saxxy 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.travis.yml +5 -0
- data/Gemfile +13 -0
- data/LICENSE +22 -0
- data/README.md +117 -0
- data/Rakefile +12 -0
- data/lib/saxxy.rb +2 -0
- data/lib/saxxy/activatable.rb +160 -0
- data/lib/saxxy/callbacks/libxml.rb +26 -0
- data/lib/saxxy/callbacks/nokogiri.rb +30 -0
- data/lib/saxxy/callbacks/ox.rb +66 -0
- data/lib/saxxy/callbacks/sax.rb +86 -0
- data/lib/saxxy/context.rb +88 -0
- data/lib/saxxy/context_tree.rb +85 -0
- data/lib/saxxy/event.rb +83 -0
- data/lib/saxxy/event_registry.rb +122 -0
- data/lib/saxxy/node_action.rb +59 -0
- data/lib/saxxy/node_rule.rb +90 -0
- data/lib/saxxy/parsers/base.rb +28 -0
- data/lib/saxxy/parsers/libxml.rb +52 -0
- data/lib/saxxy/parsers/nokogiri.rb +28 -0
- data/lib/saxxy/parsers/ox.rb +30 -0
- data/lib/saxxy/service.rb +47 -0
- data/lib/saxxy/utils/agent.rb +66 -0
- data/lib/saxxy/utils/callback_array.rb +27 -0
- data/lib/saxxy/utils/helpers.rb +13 -0
- data/lib/saxxy/version.rb +3 -0
- data/saxxy.gemspec +21 -0
- data/spec/saxxy/activatable_spec.rb +344 -0
- data/spec/saxxy/callbacks/sax_spec.rb +456 -0
- data/spec/saxxy/context_spec.rb +51 -0
- data/spec/saxxy/context_tree_spec.rb +68 -0
- data/spec/saxxy/event_registry_spec.rb +137 -0
- data/spec/saxxy/event_spec.rb +49 -0
- data/spec/saxxy/node_action_spec.rb +46 -0
- data/spec/saxxy/node_rule_spec.rb +99 -0
- data/spec/saxxy/parsers/libxml_spec.rb +104 -0
- data/spec/saxxy/parsers/nokogiri_spec.rb +200 -0
- data/spec/saxxy/parsers/ox_spec.rb +175 -0
- data/spec/saxxy/utils/agent_spec.rb +63 -0
- data/spec/spec_helper.rb +28 -0
- data/spec/support/agent_macros.rb +24 -0
- metadata +155 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
require "forwardable"
|
2
|
+
require "saxxy/utils/callback_array"
|
3
|
+
require "saxxy/event_registry"
|
4
|
+
|
5
|
+
|
6
|
+
module Saxxy
|
7
|
+
module Callbacks
|
8
|
+
|
9
|
+
module SAX
|
10
|
+
def self.included(base)
|
11
|
+
base.extend Forwardable
|
12
|
+
base.def_delegators :@event_registry, :activate_events_on, :deactivate_events_on,
|
13
|
+
:push_text, :register_event_from_action, :remove_actions!
|
14
|
+
end
|
15
|
+
|
16
|
+
def initialize(context)
|
17
|
+
initialize_state
|
18
|
+
@active_pool << context
|
19
|
+
end
|
20
|
+
|
21
|
+
def on_start_element(name, attrs = {})
|
22
|
+
register_and_activate_events_on(name, attrs)
|
23
|
+
activate_contexts_on(name, attrs)
|
24
|
+
end
|
25
|
+
|
26
|
+
def on_characters(string)
|
27
|
+
push_text(string || "")
|
28
|
+
end
|
29
|
+
|
30
|
+
def on_end_element(name)
|
31
|
+
deactivate_events_on(name)
|
32
|
+
deactivate_contexts_on(name)
|
33
|
+
end
|
34
|
+
|
35
|
+
def on_end_document
|
36
|
+
@active_pool.clear
|
37
|
+
@inactive_pool.clear
|
38
|
+
@event_registry.clear
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def initialize_state
|
43
|
+
@event_registry = EventRegistry.new
|
44
|
+
@inactive_pool = CallbackArray.new.on_add(&method(:on_add_to_inactive_pool))
|
45
|
+
@active_pool = CallbackArray.new.on_add(&method(:on_add_to_active_pool)).on_remove(&method(:on_remove_from_active_pool))
|
46
|
+
end
|
47
|
+
|
48
|
+
def register_and_activate_events_on(name, attrs)
|
49
|
+
actions.select { |a| a.matches(name, attrs) }.each do |action|
|
50
|
+
register_event_from_action(action, name, attrs)
|
51
|
+
end
|
52
|
+
activate_events_on(name, attrs)
|
53
|
+
end
|
54
|
+
|
55
|
+
def activate_contexts_on(name, attrs)
|
56
|
+
@inactive_pool.each { |context| context.activate_on(name, attrs) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def deactivate_contexts_on(name)
|
60
|
+
@active_pool.each { |context| context.deactivate_on(name) }
|
61
|
+
end
|
62
|
+
|
63
|
+
def on_add_to_inactive_pool(context)
|
64
|
+
context.on_activation do |context|
|
65
|
+
@inactive_pool >> context
|
66
|
+
@active_pool << context
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def on_add_to_active_pool(context)
|
71
|
+
context.on_deactivation { |context| @active_pool >> context }
|
72
|
+
context.child_contexts.each { |context| @inactive_pool << context }
|
73
|
+
end
|
74
|
+
|
75
|
+
def on_remove_from_active_pool(context)
|
76
|
+
@inactive_pool << context if @active_pool.member?(context.parent_context)
|
77
|
+
remove_actions!(*context.actions)
|
78
|
+
end
|
79
|
+
|
80
|
+
def actions
|
81
|
+
@active_pool.flat_map(&:actions)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require "saxxy/activatable"
|
2
|
+
require "saxxy/node_action"
|
3
|
+
|
4
|
+
|
5
|
+
module Saxxy
|
6
|
+
|
7
|
+
##
|
8
|
+
# @author rubymaniac
|
9
|
+
#
|
10
|
+
# Context describes, semantically, an XML tag-context.
|
11
|
+
# For example an XML tag-context: <div>This is a context</div>
|
12
|
+
#
|
13
|
+
# Whether a tag-context is described by a Context object
|
14
|
+
# depends on whether the Context's activation rule matches
|
15
|
+
# this tag-context. Because a Context can be activated
|
16
|
+
# on a tag-context that matches it's activation rule
|
17
|
+
# it includes the "Activatable" module.
|
18
|
+
#
|
19
|
+
# A context can belong to a parent context and thus it
|
20
|
+
# may have a `parent_context` attribute that points to
|
21
|
+
# it's parent. A context may also have `child_contexts`.
|
22
|
+
#
|
23
|
+
# @!attribute [r] activation_rule
|
24
|
+
# @return [NodeRule] this context's activation rule
|
25
|
+
#
|
26
|
+
# @!attribute [r|w] parent_context
|
27
|
+
# @return [Context] this context's parent
|
28
|
+
#
|
29
|
+
# @!attribute [r] child_contexts
|
30
|
+
# @return [Array<Context>] this context's immediate descendants
|
31
|
+
##
|
32
|
+
class Context
|
33
|
+
include Activatable
|
34
|
+
|
35
|
+
attr_accessor :parent_context
|
36
|
+
attr_reader :child_contexts, :actions
|
37
|
+
|
38
|
+
# Initializes a context with an `activation_rule` (defaults to `nil`).
|
39
|
+
#
|
40
|
+
# @param activation_rule [NodeRule] an instance of NodeRule or nil to
|
41
|
+
# declare that this context is automatically active.
|
42
|
+
#
|
43
|
+
def initialize(activation_rule = nil)
|
44
|
+
@child_contexts = []
|
45
|
+
@actions = []
|
46
|
+
initialize_activatable(activation_rule)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Registers either a Context as a `child_context` by setting
|
50
|
+
# it's `parent_context` attribute to itself and appending it to
|
51
|
+
# the `child_contexts` array, or a NodeAction by appending it to
|
52
|
+
# the `actions` array.
|
53
|
+
#
|
54
|
+
# @param obj [Context|NodeAction] An instance of Context or
|
55
|
+
# an instance of NodeAction.
|
56
|
+
#
|
57
|
+
# @return [Context] self, i.e. the context
|
58
|
+
#
|
59
|
+
def register(obj)
|
60
|
+
case obj
|
61
|
+
when Context
|
62
|
+
obj.parent_context = self
|
63
|
+
@child_contexts << obj
|
64
|
+
when NodeAction
|
65
|
+
@actions << obj
|
66
|
+
end
|
67
|
+
self
|
68
|
+
end
|
69
|
+
|
70
|
+
# Checks whether this context has a parent.
|
71
|
+
#
|
72
|
+
# @return [Boolean] true if it has a `parent_context`, false otherwise
|
73
|
+
#
|
74
|
+
def has_parent?
|
75
|
+
!parent_context.nil?
|
76
|
+
end
|
77
|
+
|
78
|
+
# Checks whether this context is a root context,
|
79
|
+
# i.e. it has no `parent_context`
|
80
|
+
#
|
81
|
+
# @return [Boolean] false if it has a `parent_context`, true otherwise
|
82
|
+
#
|
83
|
+
def root?
|
84
|
+
!has_parent?
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
require "saxxy/context"
|
2
|
+
require "saxxy/node_action"
|
3
|
+
require "saxxy/node_rule"
|
4
|
+
|
5
|
+
|
6
|
+
module Saxxy
|
7
|
+
|
8
|
+
##
|
9
|
+
# @author rubymaniac
|
10
|
+
#
|
11
|
+
# ContextTree describes the tree of contexts that the user
|
12
|
+
# eventually constructs by constraining NodeActions to be
|
13
|
+
# active under some Context.
|
14
|
+
#
|
15
|
+
#
|
16
|
+
# @!attribute [r|w] root
|
17
|
+
# @return [Context] the root context of the tree
|
18
|
+
#
|
19
|
+
##
|
20
|
+
class ContextTree
|
21
|
+
attr_accessor :root
|
22
|
+
|
23
|
+
# Initializes a ContextTree by passing an optional context to be used
|
24
|
+
# by the actions in order to execute their action block and a block that
|
25
|
+
# will be evaluated in order to create the intenal tree structure.
|
26
|
+
#
|
27
|
+
# @param ctx [Object] an object to be used by the NodeActions
|
28
|
+
# in order to evaluate their block
|
29
|
+
#
|
30
|
+
# @param block [Proc] a block that will get evaluated and create
|
31
|
+
# the context tree structure
|
32
|
+
#
|
33
|
+
def initialize(ctx = nil, &block)
|
34
|
+
self.root = Saxxy::Context.new
|
35
|
+
@ctx = ctx || eval("self", block.binding)
|
36
|
+
eval_subtree!(&block)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Creates a Context and uses the arguments to create its activation rule. After
|
40
|
+
# creating the context it registers it under the current root context and returns it.
|
41
|
+
#
|
42
|
+
# @param regexp_or_string [String|Regexp] the activation rule's name
|
43
|
+
# @param attributes [Hash] the activation rule's attributes
|
44
|
+
# @param block [Proc] a block that will get evaluated and register
|
45
|
+
# the child contexts and the actions
|
46
|
+
#
|
47
|
+
# @return [Context] the registered Context
|
48
|
+
def under(regexp_or_string, attributes = {}, &block)
|
49
|
+
Saxxy::Context.new(Saxxy::NodeRule.new(regexp_or_string, attributes)).tap do |context|
|
50
|
+
__register_context(context, &block)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Creates a NodeAction and uses the arguments to create its activation rule and registers
|
55
|
+
# it under the current root context.
|
56
|
+
#
|
57
|
+
# @param regexp_or_string [String|Regexp] the activation rule's name
|
58
|
+
# @param attributes [Hash] the activation rule's attributes
|
59
|
+
# @param block [Proc] the NodeAction's action block that will get
|
60
|
+
# evaluated on the passed context at construction
|
61
|
+
#
|
62
|
+
# @return [NodeAction] the registered NodeAction
|
63
|
+
def on(regexp_or_string, attributes = {}, &block)
|
64
|
+
Saxxy::NodeAction.new(Saxxy::NodeRule.new(regexp_or_string, attributes), @ctx, &block).tap do |action|
|
65
|
+
__register_action(action)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
def eval_subtree!(&block)
|
71
|
+
instance_eval(&block) if block_given?
|
72
|
+
self.root = root.parent_context if root.has_parent?
|
73
|
+
end
|
74
|
+
|
75
|
+
def __register_action(action)
|
76
|
+
root.register(action)
|
77
|
+
end
|
78
|
+
|
79
|
+
def __register_context(context, &block)
|
80
|
+
root.register(self.root = context)
|
81
|
+
eval_subtree!(&block)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
data/lib/saxxy/event.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
require "saxxy/activatable"
|
2
|
+
|
3
|
+
|
4
|
+
module Saxxy
|
5
|
+
|
6
|
+
##
|
7
|
+
# @author rubymaniac
|
8
|
+
#
|
9
|
+
# An Event refers to a specific NodeAction and is what
|
10
|
+
# is registered when a NodeAction matches a specific node.
|
11
|
+
# Because a NodeAction may match more than one node many
|
12
|
+
# events, under this action, should get registered.
|
13
|
+
#
|
14
|
+
#
|
15
|
+
# @!attribute [r] text
|
16
|
+
# @return [String] the text under the matching node
|
17
|
+
#
|
18
|
+
# @!attribute [r] attributes
|
19
|
+
# @return [Hash] the attributes of the matching node
|
20
|
+
#
|
21
|
+
# @!attribute [r] element_name
|
22
|
+
# @return [Hash] the name of the matching node
|
23
|
+
#
|
24
|
+
# @!attribute [r] action
|
25
|
+
# @return [NodeAction] the underlying NodeAction
|
26
|
+
##
|
27
|
+
class Event
|
28
|
+
include Activatable
|
29
|
+
|
30
|
+
attr_reader :text, :attributes, :element_name, :action
|
31
|
+
|
32
|
+
# Initializes an Event with an associated NodeAction
|
33
|
+
#
|
34
|
+
# @param action [NodeAction] this event's action
|
35
|
+
#
|
36
|
+
def initialize(action)
|
37
|
+
@action = action
|
38
|
+
initialize_options
|
39
|
+
initialize_activatable(action.activation_rule)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Appends the argument to the event's text attribute
|
43
|
+
#
|
44
|
+
# @param text [NodeAction] the text to append
|
45
|
+
#
|
46
|
+
# @return text [String] the event's text
|
47
|
+
def append_text(text)
|
48
|
+
@text += text
|
49
|
+
@text
|
50
|
+
end
|
51
|
+
|
52
|
+
# Merges the argument hash to the event's attributes
|
53
|
+
#
|
54
|
+
# @param attrs [Hash] the attributes to merge
|
55
|
+
#
|
56
|
+
# @return attributes [Hash] the event's attributes
|
57
|
+
def merge_attributes(attrs)
|
58
|
+
@attributes.merge!(attrs)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Changes the element_name
|
62
|
+
#
|
63
|
+
# @param name [String] the new element_name
|
64
|
+
#
|
65
|
+
# @return element_name [String] the event's element_name
|
66
|
+
def set_element_name(name)
|
67
|
+
@element_name = name
|
68
|
+
end
|
69
|
+
|
70
|
+
# Calls the action with the text, element_name, attributes
|
71
|
+
def fire
|
72
|
+
action.call(text, element_name, attributes)
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
def initialize_options
|
77
|
+
@text = ""
|
78
|
+
@element_name = nil
|
79
|
+
@attributes = {}
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
require "saxxy/event"
|
2
|
+
|
3
|
+
|
4
|
+
module Saxxy
|
5
|
+
|
6
|
+
##
|
7
|
+
# @author rubymaniac
|
8
|
+
#
|
9
|
+
# The Event Registry is in charge of registering new events
|
10
|
+
# and firing the event callback whenever a specific event gets
|
11
|
+
# deactivated.
|
12
|
+
#
|
13
|
+
# The registry has an @actions instance variable that holds
|
14
|
+
#
|
15
|
+
##
|
16
|
+
class EventRegistry
|
17
|
+
|
18
|
+
# Initializes an empty Event Registry
|
19
|
+
#
|
20
|
+
def initialize
|
21
|
+
clear
|
22
|
+
end
|
23
|
+
|
24
|
+
# Registers an event into the registry by initializing it and setting
|
25
|
+
# its element_name and attributes accordingly.
|
26
|
+
#
|
27
|
+
# @param action [NodeAction] the action under which the event is registered
|
28
|
+
# @param name [String] the element_name for the event
|
29
|
+
# @param attributes [Hash] the attributes for the event
|
30
|
+
#
|
31
|
+
# @return event [Event] the registered event
|
32
|
+
#
|
33
|
+
def register_event_from_action(action, name = nil, attributes = {})
|
34
|
+
new_event_for(action).tap do |e|
|
35
|
+
e.set_element_name(name)
|
36
|
+
e.merge_attributes(attributes)
|
37
|
+
self[action] << e
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# Loops through the active actions (those registered) and takes the last,
|
42
|
+
# i.e. active, event.
|
43
|
+
#
|
44
|
+
# @return events [Array] all the active events
|
45
|
+
#
|
46
|
+
def events
|
47
|
+
@actions.values.map(&:last)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Appends the text on every active event
|
51
|
+
#
|
52
|
+
# @param text [String] the text to append
|
53
|
+
#
|
54
|
+
# @return events [Array] all the active events
|
55
|
+
#
|
56
|
+
def push_text(text)
|
57
|
+
send_on_each_event(:append_text, text)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Deactivate the active events on a specific node
|
61
|
+
#
|
62
|
+
# @param element_name [String] the element name
|
63
|
+
#
|
64
|
+
# @return events [Array] all the active events
|
65
|
+
#
|
66
|
+
def deactivate_events_on(element_name)
|
67
|
+
send_on_each_event(:deactivate_on, element_name)
|
68
|
+
end
|
69
|
+
|
70
|
+
# Activate the active events on a specific node. This is done
|
71
|
+
# in order to increase the events' internal counter.
|
72
|
+
#
|
73
|
+
# @param element_name [String] nodes' element name
|
74
|
+
# @param attributes [Hash] nodes' attributes
|
75
|
+
#
|
76
|
+
# @return events [Array] all the active events
|
77
|
+
#
|
78
|
+
def activate_events_on(element_name, attributes)
|
79
|
+
send_on_each_event(:activate_on, element_name, attributes)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Deletes the provided actions from the @actions without
|
83
|
+
# firing any callbacks.
|
84
|
+
#
|
85
|
+
# @param actions [Array] actions to be removed
|
86
|
+
#
|
87
|
+
# @return actions [Hash] the registered actions
|
88
|
+
#
|
89
|
+
def remove_actions!(*actions)
|
90
|
+
actions.each { |a| @actions.delete(a) }
|
91
|
+
@actions
|
92
|
+
end
|
93
|
+
|
94
|
+
# Clears all registered actions
|
95
|
+
#
|
96
|
+
def clear
|
97
|
+
@actions = {}
|
98
|
+
end
|
99
|
+
|
100
|
+
private
|
101
|
+
def new_event_for(action)
|
102
|
+
Saxxy::Event.new(action).on_deactivation do |ev|
|
103
|
+
ev.fire
|
104
|
+
unregister_event(action, ev)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def unregister_event(action, event)
|
109
|
+
self[action].delete(event)
|
110
|
+
self[action].last ? self[action].last.append_text(event.text) : @actions.delete(action)
|
111
|
+
end
|
112
|
+
|
113
|
+
def [](action)
|
114
|
+
@actions[action] ||= []
|
115
|
+
end
|
116
|
+
|
117
|
+
def send_on_each_event(method, *args)
|
118
|
+
events.each { |e| e.public_send(method, *args) }
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
end
|