saper 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +126 -0
  4. data/Rakefile +17 -0
  5. data/bin/saper +60 -0
  6. data/lib/lib/json_search.rb +54 -0
  7. data/lib/lib/mechanize.rb +26 -0
  8. data/lib/lib/nokogiri.rb +12 -0
  9. data/lib/saper.rb +37 -0
  10. data/lib/saper/actions/append_with.rb +14 -0
  11. data/lib/saper/actions/convert_to_html.rb +14 -0
  12. data/lib/saper/actions/convert_to_json.rb +14 -0
  13. data/lib/saper/actions/convert_to_markdown.rb +13 -0
  14. data/lib/saper/actions/convert_to_time.rb +15 -0
  15. data/lib/saper/actions/convert_to_xml.rb +14 -0
  16. data/lib/saper/actions/create_atom.rb +18 -0
  17. data/lib/saper/actions/fetch.rb +17 -0
  18. data/lib/saper/actions/find.rb +18 -0
  19. data/lib/saper/actions/find_first.rb +16 -0
  20. data/lib/saper/actions/get_attribute.rb +15 -0
  21. data/lib/saper/actions/get_contents.rb +14 -0
  22. data/lib/saper/actions/get_text.rb +14 -0
  23. data/lib/saper/actions/prepend_with.rb +14 -0
  24. data/lib/saper/actions/remove_after.rb +14 -0
  25. data/lib/saper/actions/remove_before.rb +14 -0
  26. data/lib/saper/actions/remove_matching.rb +14 -0
  27. data/lib/saper/actions/remove_tags.rb +15 -0
  28. data/lib/saper/actions/replace.rb +15 -0
  29. data/lib/saper/actions/run_recipe.rb +24 -0
  30. data/lib/saper/actions/run_recipe_and_save.rb +22 -0
  31. data/lib/saper/actions/save.rb +14 -0
  32. data/lib/saper/actions/select_matching.rb +14 -0
  33. data/lib/saper/actions/set_input.rb +19 -0
  34. data/lib/saper/actions/skip_tags.rb +15 -0
  35. data/lib/saper/actions/split.rb +24 -0
  36. data/lib/saper/arguments/attribute.rb +11 -0
  37. data/lib/saper/arguments/recipe.rb +42 -0
  38. data/lib/saper/arguments/text.rb +11 -0
  39. data/lib/saper/arguments/timezone.rb +11 -0
  40. data/lib/saper/arguments/variable.rb +11 -0
  41. data/lib/saper/arguments/xpath.rb +11 -0
  42. data/lib/saper/core/action.rb +209 -0
  43. data/lib/saper/core/argument.rb +106 -0
  44. data/lib/saper/core/browser.rb +87 -0
  45. data/lib/saper/core/dsl.rb +68 -0
  46. data/lib/saper/core/error.rb +47 -0
  47. data/lib/saper/core/item.rb +70 -0
  48. data/lib/saper/core/keychain.rb +18 -0
  49. data/lib/saper/core/logger.rb +74 -0
  50. data/lib/saper/core/namespace.rb +139 -0
  51. data/lib/saper/core/recipe.rb +134 -0
  52. data/lib/saper/core/runtime.rb +237 -0
  53. data/lib/saper/core/type.rb +45 -0
  54. data/lib/saper/items/atom.rb +64 -0
  55. data/lib/saper/items/document.rb +66 -0
  56. data/lib/saper/items/html.rb +85 -0
  57. data/lib/saper/items/json.rb +67 -0
  58. data/lib/saper/items/markdown.rb +36 -0
  59. data/lib/saper/items/nothing.rb +15 -0
  60. data/lib/saper/items/text.rb +54 -0
  61. data/lib/saper/items/time.rb +42 -0
  62. data/lib/saper/items/url.rb +34 -0
  63. data/lib/saper/items/xml.rb +79 -0
  64. data/lib/saper/version.rb +3 -0
  65. data/spec/actions/append_with_spec.rb +30 -0
  66. data/spec/actions/convert_to_html_spec.rb +24 -0
  67. data/spec/actions/convert_to_json_spec.rb +24 -0
  68. data/spec/actions/convert_to_markdown_spec.rb +24 -0
  69. data/spec/actions/convert_to_time_spec.rb +37 -0
  70. data/spec/actions/convert_to_xml_spec.rb +24 -0
  71. data/spec/actions/create_atom_spec.rb +31 -0
  72. data/spec/actions/fetch_spec.rb +7 -0
  73. data/spec/actions/find_first_spec.rb +7 -0
  74. data/spec/actions/find_spec.rb +7 -0
  75. data/spec/actions/get_attribute_spec.rb +7 -0
  76. data/spec/actions/get_contents.rb +7 -0
  77. data/spec/actions/get_text.rb +7 -0
  78. data/spec/actions/prepend_with_spec.rb +30 -0
  79. data/spec/actions/remove_after.rb +7 -0
  80. data/spec/actions/remove_before.rb +7 -0
  81. data/spec/actions/replace_spec.rb +7 -0
  82. data/spec/actions/run_recipe_and_save_spec.tmp.rb +52 -0
  83. data/spec/actions/run_recipe_spec.tmp.rb +53 -0
  84. data/spec/actions/save_spec.rb +7 -0
  85. data/spec/actions/select_matching_spec.rb +7 -0
  86. data/spec/actions/set_input_spec.rb +7 -0
  87. data/spec/actions/skip_tags_spec.rb +7 -0
  88. data/spec/actions/split_spec.rb +7 -0
  89. data/spec/core/action_spec.rb +151 -0
  90. data/spec/core/argument_spec.rb +79 -0
  91. data/spec/core/browser_spec.rb +7 -0
  92. data/spec/core/dsl_spec.rb +7 -0
  93. data/spec/core/item_spec.rb +7 -0
  94. data/spec/core/keychain_spec.rb +7 -0
  95. data/spec/core/logger_spec.rb +7 -0
  96. data/spec/core/namespace_spec.rb +18 -0
  97. data/spec/core/recipe_spec.rb +81 -0
  98. data/spec/core/runtime_spec.rb +165 -0
  99. data/spec/core/type_spec.rb +7 -0
  100. data/spec/items/atom_spec.rb +7 -0
  101. data/spec/items/document_spec.rb +7 -0
  102. data/spec/items/html_spec.rb +7 -0
  103. data/spec/items/json_spec.rb +7 -0
  104. data/spec/items/markdown_spec.rb +7 -0
  105. data/spec/items/nothing_spec.rb +7 -0
  106. data/spec/items/text_spec.rb +17 -0
  107. data/spec/items/time_spec.rb +7 -0
  108. data/spec/items/url_spec.rb +7 -0
  109. data/spec/items/xml_spec.rb +17 -0
  110. data/spec/spec_helper.rb +22 -0
  111. metadata +355 -0
@@ -0,0 +1,11 @@
1
+ module Saper
2
+ module Arguments
3
+ class Timezone < Argument
4
+
5
+ def valid?(value)
6
+ value.is_a?(String)
7
+ end
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ module Saper
2
+ module Arguments
3
+ class Variable < Argument
4
+
5
+ def valid?(value)
6
+ value.is_a?(String) || value.is_a?(Symbol)
7
+ end
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ module Saper
2
+ module Arguments
3
+ class XPath < Argument
4
+
5
+ def valid?(value)
6
+ value.is_a?(String)
7
+ end
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,209 @@
1
+ module Saper
2
+ class Action
3
+
4
+ # Tracks subclasses of Saper::Action.
5
+ # @return [Class]
6
+ def self.inherited(base)
7
+ subclasses[base.type] = base
8
+ end
9
+
10
+ # Returns a hash of subclasses.
11
+ # @return [Hash]
12
+ def self.subclasses
13
+ @subclasses ||= {}
14
+ end
15
+
16
+ # Returns class name as an underscored string.
17
+ # @return [String]
18
+ def self.type
19
+ name.split("::").last.gsub(/([a-z])([A-Z])/,'\1_\2').downcase
20
+ end
21
+
22
+ # Returns a subclass with specified type.
23
+ # @param type [Symbol] action type
24
+ # @return [Saper::Action]
25
+ def self.[](type)
26
+ subclasses[type.to_s] || raise(ActionNotFound, type)
27
+ end
28
+
29
+ # Defines a new argument.
30
+ # @param type [Symbol] argument type (e.g. :text, :xpath)
31
+ # @param options [Hash] argument options (e.g. :optional => true)
32
+ # @return [void]
33
+ def self.argument(type, options = {})
34
+ if Argument.exists?(type)
35
+ arguments.push options.merge(:type => type)
36
+ else
37
+ raise(InvalidType, "Invalid action argument: %s" % type)
38
+ end
39
+ end
40
+
41
+ # Returns a list of arguments assigned to this action.
42
+ # @return [Array<Hash>]
43
+ def self.arguments
44
+ @arguments ||= []
45
+ end
46
+
47
+ # Sets (one) or returns (all) acceptable input types.
48
+ # @param input [Symbol]
49
+ # @return [Array<Symbol>]
50
+ def self.accepts(input = nil, options = {})
51
+ if input.nil?
52
+ return @types.nil? ? [] : @types.keys
53
+ end
54
+ if input == :anything
55
+ return Item.subclasses.keys.map { |type| accepts(type.to_sym, options) }
56
+ end
57
+ output = options[:returns] || input
58
+ unless Item.exists?(input)
59
+ raise(InvalidInput, "Invalid input type: %s" % input)
60
+ end
61
+ if output.is_a?(Symbol) && !Item.exists?(output)
62
+ raise(InvalidInput, "Invalid output type: %s" % output)
63
+ end
64
+ @types ||= {}
65
+ @types[input] = output
66
+ end
67
+
68
+ # Returns `true` if action accepts specified type as input.
69
+ # @param type [Symbol]
70
+ # @return [Boolean]
71
+ def self.accepts?(type)
72
+ @types.nil? ? false : @types.keys.include?(type)
73
+ end
74
+
75
+ # Saves Proc that encapsulates action logic and will be used later for data processing.
76
+ # @return [void]
77
+ def self.run(&block)
78
+ @block ||= block
79
+ end
80
+
81
+ # Returns `true` if action returns multiple items. Note that this method will report incorrect data for some actions. Use #multiple? instead.
82
+ def self.returns_multiple_items?
83
+ @multiple == true
84
+ end
85
+
86
+ # Sets a flag, indicating that this action returns multiple items.
87
+ # @return [void]
88
+ def self.returns_multiple_items!
89
+ @multiple = true
90
+ end
91
+
92
+ # Returns a new instance of Saper::Action.
93
+ # @param data [Hash, Array<Hash>]
94
+ # @return [Saper::Action]
95
+ def self.unserialize(data, namespace = nil, &block)
96
+ if data.is_a?(Array)
97
+ return data.map { |item| unserialize(item, namespace) }
98
+ end
99
+ unless data.is_a?(Hash)
100
+ raise InvalidAction.new(data)
101
+ end
102
+ new(data[:type], *data[:args], :namespace => namespace) do |action|
103
+ if block_given?
104
+ yield action
105
+ end
106
+ end
107
+ end
108
+
109
+ # Returns a new instance of Saper::Action.
110
+ # @return [Saper::Action]
111
+ def self.new(*args, &block)
112
+ if self == Action
113
+ self[args.shift].new(*args, &block)
114
+ else
115
+ super(*args, &block)
116
+ end
117
+ end
118
+
119
+ attr_reader :options
120
+
121
+ # Returns a new instance of Saper::Action.
122
+ # @return [Saper::Action]
123
+ def initialize(*args)
124
+ @arguments = []
125
+ @options = {}
126
+ if args.last.is_a?(Hash)
127
+ @options = args.pop
128
+ end
129
+ self.class.arguments.each_with_index do |opts, i|
130
+ opts.merge!(:value => args[i], :action => self)
131
+ @arguments << Argument.new(opts[:type], opts)
132
+ end
133
+ if block_given?
134
+ yield self
135
+ end
136
+ end
137
+
138
+ # Runs action and returns results.
139
+ # @param input [object] input
140
+ # @return [void] depends on action type
141
+ def run(input = nil, runtime = nil)
142
+ unless input.is_a?(Item)
143
+ input = self.class.accepts.map { |type| Item.try(type, input) }.compact.first
144
+ end
145
+ if input.nil?
146
+ input = Items::Nothing.new
147
+ end
148
+ unless self.class.accepts?(input.type)
149
+ raise(InvalidInput, input)
150
+ end
151
+ if runtime.nil?
152
+ begin
153
+ block.call(input, *args)
154
+ rescue NameError
155
+ raise RuntimeMissing
156
+ end
157
+ else
158
+ runtime.instance_exec(input, *args, &self.block)
159
+ end
160
+ end
161
+
162
+ # Returns human readable action name.
163
+ # @return [String]
164
+ def name
165
+ self.class.name.split("::").last.gsub(/([a-z])([A-Z])/,'\1 \2')
166
+ end
167
+
168
+ # Returns Saper::Namespace instance.
169
+ # @return [Namespace]
170
+ def namespace
171
+ @options[:namespace].is_a?(Namespace) ? @options[:namespace] : nil
172
+ end
173
+
174
+ # Returns a list of data types that are accepted as input.
175
+ # @return [Array<Symbol>]
176
+ def requires
177
+ self.class.accepts
178
+ end
179
+
180
+ # Returns values of action arguments.
181
+ # @return [Array]
182
+ def args
183
+ @arguments.map(&:value)
184
+ end
185
+
186
+ # Returns a serialized representation of this action.
187
+ # @return [Hash]
188
+ def serialize
189
+ { :type => self.class.type, :args => @arguments.map(&:serialize) }
190
+ end
191
+
192
+ # Returns `true` if action returns multiple items.
193
+ # @return [Boolean]
194
+ def multiple?
195
+ self.class.returns_multiple_items?
196
+ end
197
+
198
+ # Returns Proc that encapsulates action logic (i.e. processes data).
199
+ # @return [Proc]
200
+ def block
201
+ self.class.run || Proc.new { |input, *args| input }
202
+ end
203
+
204
+ def to_string
205
+ "\t%s %s" % [self.class.type, @arguments.map(&:to_string).join(", ")]
206
+ end
207
+
208
+ end
209
+ end
@@ -0,0 +1,106 @@
1
+ module Saper
2
+ class Argument
3
+
4
+ # Tracks subclasses of Saper::Argument.
5
+ # @return [Class]
6
+ def self.inherited(base)
7
+ subclasses[base.type] = base
8
+ end
9
+
10
+ # Returns a hash of subclasses.
11
+ # @return [Hash]
12
+ def self.subclasses
13
+ @subclasses ||= {}
14
+ end
15
+
16
+ # Returns class name as an underscored string.
17
+ # @return [String]
18
+ def self.type
19
+ name.split("::").last.gsub(/([a-z])([A-Z])/,'\1_\2').downcase
20
+ end
21
+
22
+ # Returns a subclass with specified type.
23
+ # @param type [Symbol] action type
24
+ # @return [Saper::Argument]
25
+ def self.[](type)
26
+ subclasses[type.to_s] || raise(InvalidType, "Invalid argument: %s" % type)
27
+ end
28
+
29
+ # Returns `true` if there is a subclass with specified type.
30
+ # @param type [Symbol] action type
31
+ # @return [Boolean]
32
+ def self.exists?(type)
33
+ subclasses.keys.include?(type.to_s)
34
+ end
35
+
36
+ # Returns a new instance of Saper::Argument.
37
+ # @return [Saper::Argument]
38
+ def self.new(*args, &block)
39
+ if self == Argument
40
+ self[args.shift].new(*args, &block)
41
+ else
42
+ super(*args, &block)
43
+ end
44
+ end
45
+
46
+ # @todo
47
+ def initialize(opts = {})
48
+ @value = nil
49
+ @opts = opts
50
+ if mandatory? || !@opts[:value].nil?
51
+ set @opts.delete(:value)
52
+ end
53
+ end
54
+
55
+ # @todo
56
+ def set(value)
57
+ unless valid?(value)
58
+ raise InvalidArgument, value
59
+ else
60
+ @value = normalize(value)
61
+ end
62
+ self
63
+ end
64
+
65
+ # @todo
66
+ def valid?(value)
67
+ true
68
+ end
69
+
70
+ # @todo
71
+ def normalize(value)
72
+ value
73
+ end
74
+
75
+ # @todo
76
+ def value
77
+ @value
78
+ end
79
+
80
+ # @todo
81
+ def serialize
82
+ value
83
+ end
84
+
85
+ # @todo
86
+ def mandatory?
87
+ not optional?
88
+ end
89
+
90
+ # @todo
91
+ def action
92
+ @opts[:action]
93
+ end
94
+
95
+ # @todo
96
+ def optional?
97
+ @opts[:optional] == true
98
+ end
99
+
100
+ # @todo
101
+ def to_string
102
+ value.to_s.inspect
103
+ end
104
+
105
+ end
106
+ end
@@ -0,0 +1,87 @@
1
+ module Saper
2
+ class Browser
3
+
4
+ require 'mechanize'
5
+
6
+ attr_reader :history, :received, :sent
7
+
8
+ def initialize(options = {})
9
+ @agent = options.delete(:agent)
10
+ @headers = options.delete(:headers)
11
+ @logger = options.delete(:logger) || Saper::Logger.new
12
+ @history = []
13
+ @received = 0
14
+ @sent = 0
15
+ @mech = Mechanize.new do |a|
16
+ a.robots = false
17
+ a.user_agent = agent
18
+ a.request_headers = headers
19
+ a.pluggable_parser.csv = nil
20
+ a.pluggable_parser.html = nil
21
+ a.pluggable_parser.xhtml = nil
22
+ a.pluggable_parser.xml = nil
23
+ end
24
+ @mech.pre_connect_hook do |agent, req|
25
+ @sent += req.to_hash.to_s.size
26
+ @sent += (req.body.nil? ? 0 : req.body.size)
27
+ end
28
+ @mech.post_connect_hook do |agent, uri, resp, body|
29
+ @received += resp.to_hash.to_s.size
30
+ @received += body.size
31
+ end
32
+ end
33
+
34
+ # Returns number of HTTP requests
35
+ def requests
36
+ @history.size
37
+ end
38
+
39
+ #
40
+ def headers
41
+ @headers.respond_to?(:to_hash) ? @headers : {}
42
+ end
43
+
44
+ #
45
+ def get(url, query = {})
46
+ @logger.download(url)
47
+ @history.push url
48
+ data = @mech.get(url, query)
49
+ Saper::Items::Document.new data
50
+ end
51
+
52
+ #
53
+ def post(url, query = {})
54
+ @logger.download(url)
55
+ @history.push url
56
+ data = @mech.post(url, query)
57
+ Saper::Items::Document.new data
58
+ end
59
+
60
+ #
61
+ def agent
62
+ case @agent
63
+ when :ie6
64
+ 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
65
+ when :ie7
66
+ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
67
+ when :ie8
68
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
69
+ when :ie9
70
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)'
71
+ when :mozilla
72
+ 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6'
73
+ when :safari
74
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22'
75
+ when :iphone
76
+ 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3'
77
+ when :ipad
78
+ 'Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10'
79
+ when :android
80
+ 'Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
81
+ else
82
+ 'Mozilla/5.0 (compatible; Saper Ruby client %s)' % Saper::VERSION
83
+ end
84
+ end
85
+
86
+ end
87
+ end
@@ -0,0 +1,68 @@
1
+ module Saper
2
+ module DSL
3
+
4
+ def self.new
5
+ Module.new.extend(Methods)
6
+ end
7
+
8
+ def self.included(base)
9
+ base.extend(Methods)
10
+ end
11
+
12
+ module Methods
13
+
14
+ def namespace
15
+ @namespace ||= Saper::Namespace.new
16
+ end
17
+
18
+ def recipe(id, name = nil, &block)
19
+ namespace[id] = Recipe.parse(id, name, :namespace => namespace, &block)
20
+ end
21
+
22
+ def [](name)
23
+ namespace[name]
24
+ end
25
+
26
+ def run_by_default(*args)
27
+ namespace.run_by_default(*args)
28
+ end
29
+
30
+ def run(*args)
31
+ namespace.run(*args)
32
+ end
33
+
34
+ end
35
+
36
+ class Recipe
37
+ # Parses block and returns a Recipe instance or a proxy object.
38
+ # If namespace is specified within options, full initialization of
39
+ # Recipe is delayed and a proxy object is returned (which supports
40
+ # `#to_recipe`).
41
+ # @param id [Symbol] recipe ID
42
+ # @return [Saper::Recipe, Saper::DSL::Recipe]
43
+ def self.parse(id, name = nil, options = {}, &block)
44
+ instance = self.new(id, options.merge(:name => name), &block)
45
+ if options[:namespace].is_a?(Namespace)
46
+ instance
47
+ else
48
+ instance.to_recipe
49
+ end
50
+ end
51
+
52
+ attr_reader :recipe
53
+
54
+ def initialize(id = nil, options = {}, &block)
55
+ @recipe, @block = Saper::Recipe.new(id, options), block
56
+ end
57
+
58
+ def to_recipe
59
+ self.instance_eval(&@block) if recipe.empty?; recipe
60
+ end
61
+
62
+ def method_missing(name, *args, &block)
63
+ @recipe << Saper::Action.new(name, *args, :namespace => @recipe.namespace, &block)
64
+ end
65
+
66
+ end
67
+ end
68
+ end