saper 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (111) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +126 -0
  4. data/Rakefile +17 -0
  5. data/bin/saper +60 -0
  6. data/lib/lib/json_search.rb +54 -0
  7. data/lib/lib/mechanize.rb +26 -0
  8. data/lib/lib/nokogiri.rb +12 -0
  9. data/lib/saper.rb +37 -0
  10. data/lib/saper/actions/append_with.rb +14 -0
  11. data/lib/saper/actions/convert_to_html.rb +14 -0
  12. data/lib/saper/actions/convert_to_json.rb +14 -0
  13. data/lib/saper/actions/convert_to_markdown.rb +13 -0
  14. data/lib/saper/actions/convert_to_time.rb +15 -0
  15. data/lib/saper/actions/convert_to_xml.rb +14 -0
  16. data/lib/saper/actions/create_atom.rb +18 -0
  17. data/lib/saper/actions/fetch.rb +17 -0
  18. data/lib/saper/actions/find.rb +18 -0
  19. data/lib/saper/actions/find_first.rb +16 -0
  20. data/lib/saper/actions/get_attribute.rb +15 -0
  21. data/lib/saper/actions/get_contents.rb +14 -0
  22. data/lib/saper/actions/get_text.rb +14 -0
  23. data/lib/saper/actions/prepend_with.rb +14 -0
  24. data/lib/saper/actions/remove_after.rb +14 -0
  25. data/lib/saper/actions/remove_before.rb +14 -0
  26. data/lib/saper/actions/remove_matching.rb +14 -0
  27. data/lib/saper/actions/remove_tags.rb +15 -0
  28. data/lib/saper/actions/replace.rb +15 -0
  29. data/lib/saper/actions/run_recipe.rb +24 -0
  30. data/lib/saper/actions/run_recipe_and_save.rb +22 -0
  31. data/lib/saper/actions/save.rb +14 -0
  32. data/lib/saper/actions/select_matching.rb +14 -0
  33. data/lib/saper/actions/set_input.rb +19 -0
  34. data/lib/saper/actions/skip_tags.rb +15 -0
  35. data/lib/saper/actions/split.rb +24 -0
  36. data/lib/saper/arguments/attribute.rb +11 -0
  37. data/lib/saper/arguments/recipe.rb +42 -0
  38. data/lib/saper/arguments/text.rb +11 -0
  39. data/lib/saper/arguments/timezone.rb +11 -0
  40. data/lib/saper/arguments/variable.rb +11 -0
  41. data/lib/saper/arguments/xpath.rb +11 -0
  42. data/lib/saper/core/action.rb +209 -0
  43. data/lib/saper/core/argument.rb +106 -0
  44. data/lib/saper/core/browser.rb +87 -0
  45. data/lib/saper/core/dsl.rb +68 -0
  46. data/lib/saper/core/error.rb +47 -0
  47. data/lib/saper/core/item.rb +70 -0
  48. data/lib/saper/core/keychain.rb +18 -0
  49. data/lib/saper/core/logger.rb +74 -0
  50. data/lib/saper/core/namespace.rb +139 -0
  51. data/lib/saper/core/recipe.rb +134 -0
  52. data/lib/saper/core/runtime.rb +237 -0
  53. data/lib/saper/core/type.rb +45 -0
  54. data/lib/saper/items/atom.rb +64 -0
  55. data/lib/saper/items/document.rb +66 -0
  56. data/lib/saper/items/html.rb +85 -0
  57. data/lib/saper/items/json.rb +67 -0
  58. data/lib/saper/items/markdown.rb +36 -0
  59. data/lib/saper/items/nothing.rb +15 -0
  60. data/lib/saper/items/text.rb +54 -0
  61. data/lib/saper/items/time.rb +42 -0
  62. data/lib/saper/items/url.rb +34 -0
  63. data/lib/saper/items/xml.rb +79 -0
  64. data/lib/saper/version.rb +3 -0
  65. data/spec/actions/append_with_spec.rb +30 -0
  66. data/spec/actions/convert_to_html_spec.rb +24 -0
  67. data/spec/actions/convert_to_json_spec.rb +24 -0
  68. data/spec/actions/convert_to_markdown_spec.rb +24 -0
  69. data/spec/actions/convert_to_time_spec.rb +37 -0
  70. data/spec/actions/convert_to_xml_spec.rb +24 -0
  71. data/spec/actions/create_atom_spec.rb +31 -0
  72. data/spec/actions/fetch_spec.rb +7 -0
  73. data/spec/actions/find_first_spec.rb +7 -0
  74. data/spec/actions/find_spec.rb +7 -0
  75. data/spec/actions/get_attribute_spec.rb +7 -0
  76. data/spec/actions/get_contents.rb +7 -0
  77. data/spec/actions/get_text.rb +7 -0
  78. data/spec/actions/prepend_with_spec.rb +30 -0
  79. data/spec/actions/remove_after.rb +7 -0
  80. data/spec/actions/remove_before.rb +7 -0
  81. data/spec/actions/replace_spec.rb +7 -0
  82. data/spec/actions/run_recipe_and_save_spec.tmp.rb +52 -0
  83. data/spec/actions/run_recipe_spec.tmp.rb +53 -0
  84. data/spec/actions/save_spec.rb +7 -0
  85. data/spec/actions/select_matching_spec.rb +7 -0
  86. data/spec/actions/set_input_spec.rb +7 -0
  87. data/spec/actions/skip_tags_spec.rb +7 -0
  88. data/spec/actions/split_spec.rb +7 -0
  89. data/spec/core/action_spec.rb +151 -0
  90. data/spec/core/argument_spec.rb +79 -0
  91. data/spec/core/browser_spec.rb +7 -0
  92. data/spec/core/dsl_spec.rb +7 -0
  93. data/spec/core/item_spec.rb +7 -0
  94. data/spec/core/keychain_spec.rb +7 -0
  95. data/spec/core/logger_spec.rb +7 -0
  96. data/spec/core/namespace_spec.rb +18 -0
  97. data/spec/core/recipe_spec.rb +81 -0
  98. data/spec/core/runtime_spec.rb +165 -0
  99. data/spec/core/type_spec.rb +7 -0
  100. data/spec/items/atom_spec.rb +7 -0
  101. data/spec/items/document_spec.rb +7 -0
  102. data/spec/items/html_spec.rb +7 -0
  103. data/spec/items/json_spec.rb +7 -0
  104. data/spec/items/markdown_spec.rb +7 -0
  105. data/spec/items/nothing_spec.rb +7 -0
  106. data/spec/items/text_spec.rb +17 -0
  107. data/spec/items/time_spec.rb +7 -0
  108. data/spec/items/url_spec.rb +7 -0
  109. data/spec/items/xml_spec.rb +17 -0
  110. data/spec/spec_helper.rb +22 -0
  111. metadata +355 -0
@@ -0,0 +1,11 @@
1
+ module Saper
2
+ module Arguments
3
+ class Timezone < Argument
4
+
5
+ def valid?(value)
6
+ value.is_a?(String)
7
+ end
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ module Saper
2
+ module Arguments
3
+ class Variable < Argument
4
+
5
+ def valid?(value)
6
+ value.is_a?(String) || value.is_a?(Symbol)
7
+ end
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,11 @@
1
+ module Saper
2
+ module Arguments
3
+ class XPath < Argument
4
+
5
+ def valid?(value)
6
+ value.is_a?(String)
7
+ end
8
+
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,209 @@
1
+ module Saper
2
+ class Action
3
+
4
+ # Tracks subclasses of Saper::Action.
5
+ # @return [Class]
6
+ def self.inherited(base)
7
+ subclasses[base.type] = base
8
+ end
9
+
10
+ # Returns a hash of subclasses.
11
+ # @return [Hash]
12
+ def self.subclasses
13
+ @subclasses ||= {}
14
+ end
15
+
16
+ # Returns class name as an underscored string.
17
+ # @return [String]
18
+ def self.type
19
+ name.split("::").last.gsub(/([a-z])([A-Z])/,'\1_\2').downcase
20
+ end
21
+
22
+ # Returns a subclass with specified type.
23
+ # @param type [Symbol] action type
24
+ # @return [Saper::Action]
25
+ def self.[](type)
26
+ subclasses[type.to_s] || raise(ActionNotFound, type)
27
+ end
28
+
29
+ # Defines a new argument.
30
+ # @param type [Symbol] argument type (e.g. :text, :xpath)
31
+ # @param options [Hash] argument options (e.g. :optional => true)
32
+ # @return [void]
33
+ def self.argument(type, options = {})
34
+ if Argument.exists?(type)
35
+ arguments.push options.merge(:type => type)
36
+ else
37
+ raise(InvalidType, "Invalid action argument: %s" % type)
38
+ end
39
+ end
40
+
41
+ # Returns a list of arguments assigned to this action.
42
+ # @return [Array<Hash>]
43
+ def self.arguments
44
+ @arguments ||= []
45
+ end
46
+
47
+ # Sets (one) or returns (all) acceptable input types.
48
+ # @param input [Symbol]
49
+ # @return [Array<Symbol>]
50
+ def self.accepts(input = nil, options = {})
51
+ if input.nil?
52
+ return @types.nil? ? [] : @types.keys
53
+ end
54
+ if input == :anything
55
+ return Item.subclasses.keys.map { |type| accepts(type.to_sym, options) }
56
+ end
57
+ output = options[:returns] || input
58
+ unless Item.exists?(input)
59
+ raise(InvalidInput, "Invalid input type: %s" % input)
60
+ end
61
+ if output.is_a?(Symbol) && !Item.exists?(output)
62
+ raise(InvalidInput, "Invalid output type: %s" % output)
63
+ end
64
+ @types ||= {}
65
+ @types[input] = output
66
+ end
67
+
68
+ # Returns `true` if action accepts specified type as input.
69
+ # @param type [Symbol]
70
+ # @return [Boolean]
71
+ def self.accepts?(type)
72
+ @types.nil? ? false : @types.keys.include?(type)
73
+ end
74
+
75
+ # Saves Proc that encapsulates action logic and will be used later for data processing.
76
+ # @return [void]
77
+ def self.run(&block)
78
+ @block ||= block
79
+ end
80
+
81
+ # Returns `true` if action returns multiple items. Note that this method will report incorrect data for some actions. Use #multiple? instead.
82
+ def self.returns_multiple_items?
83
+ @multiple == true
84
+ end
85
+
86
+ # Sets a flag, indicating that this action returns multiple items.
87
+ # @return [void]
88
+ def self.returns_multiple_items!
89
+ @multiple = true
90
+ end
91
+
92
+ # Returns a new instance of Saper::Action.
93
+ # @param data [Hash, Array<Hash>]
94
+ # @return [Saper::Action]
95
+ def self.unserialize(data, namespace = nil, &block)
96
+ if data.is_a?(Array)
97
+ return data.map { |item| unserialize(item, namespace) }
98
+ end
99
+ unless data.is_a?(Hash)
100
+ raise InvalidAction.new(data)
101
+ end
102
+ new(data[:type], *data[:args], :namespace => namespace) do |action|
103
+ if block_given?
104
+ yield action
105
+ end
106
+ end
107
+ end
108
+
109
+ # Returns a new instance of Saper::Action.
110
+ # @return [Saper::Action]
111
+ def self.new(*args, &block)
112
+ if self == Action
113
+ self[args.shift].new(*args, &block)
114
+ else
115
+ super(*args, &block)
116
+ end
117
+ end
118
+
119
+ attr_reader :options
120
+
121
+ # Returns a new instance of Saper::Action.
122
+ # @return [Saper::Action]
123
+ def initialize(*args)
124
+ @arguments = []
125
+ @options = {}
126
+ if args.last.is_a?(Hash)
127
+ @options = args.pop
128
+ end
129
+ self.class.arguments.each_with_index do |opts, i|
130
+ opts.merge!(:value => args[i], :action => self)
131
+ @arguments << Argument.new(opts[:type], opts)
132
+ end
133
+ if block_given?
134
+ yield self
135
+ end
136
+ end
137
+
138
+ # Runs action and returns results.
139
+ # @param input [object] input
140
+ # @return [void] depends on action type
141
+ def run(input = nil, runtime = nil)
142
+ unless input.is_a?(Item)
143
+ input = self.class.accepts.map { |type| Item.try(type, input) }.compact.first
144
+ end
145
+ if input.nil?
146
+ input = Items::Nothing.new
147
+ end
148
+ unless self.class.accepts?(input.type)
149
+ raise(InvalidInput, input)
150
+ end
151
+ if runtime.nil?
152
+ begin
153
+ block.call(input, *args)
154
+ rescue NameError
155
+ raise RuntimeMissing
156
+ end
157
+ else
158
+ runtime.instance_exec(input, *args, &self.block)
159
+ end
160
+ end
161
+
162
+ # Returns human readable action name.
163
+ # @return [String]
164
+ def name
165
+ self.class.name.split("::").last.gsub(/([a-z])([A-Z])/,'\1 \2')
166
+ end
167
+
168
+ # Returns Saper::Namespace instance.
169
+ # @return [Namespace]
170
+ def namespace
171
+ @options[:namespace].is_a?(Namespace) ? @options[:namespace] : nil
172
+ end
173
+
174
+ # Returns a list of data types that are accepted as input.
175
+ # @return [Array<Symbol>]
176
+ def requires
177
+ self.class.accepts
178
+ end
179
+
180
+ # Returns values of action arguments.
181
+ # @return [Array]
182
+ def args
183
+ @arguments.map(&:value)
184
+ end
185
+
186
+ # Returns a serialized representation of this action.
187
+ # @return [Hash]
188
+ def serialize
189
+ { :type => self.class.type, :args => @arguments.map(&:serialize) }
190
+ end
191
+
192
+ # Returns `true` if action returns multiple items.
193
+ # @return [Boolean]
194
+ def multiple?
195
+ self.class.returns_multiple_items?
196
+ end
197
+
198
+ # Returns Proc that encapsulates action logic (i.e. processes data).
199
+ # @return [Proc]
200
+ def block
201
+ self.class.run || Proc.new { |input, *args| input }
202
+ end
203
+
204
+ def to_string
205
+ "\t%s %s" % [self.class.type, @arguments.map(&:to_string).join(", ")]
206
+ end
207
+
208
+ end
209
+ end
@@ -0,0 +1,106 @@
1
+ module Saper
2
+ class Argument
3
+
4
+ # Tracks subclasses of Saper::Argument.
5
+ # @return [Class]
6
+ def self.inherited(base)
7
+ subclasses[base.type] = base
8
+ end
9
+
10
+ # Returns a hash of subclasses.
11
+ # @return [Hash]
12
+ def self.subclasses
13
+ @subclasses ||= {}
14
+ end
15
+
16
+ # Returns class name as an underscored string.
17
+ # @return [String]
18
+ def self.type
19
+ name.split("::").last.gsub(/([a-z])([A-Z])/,'\1_\2').downcase
20
+ end
21
+
22
+ # Returns a subclass with specified type.
23
+ # @param type [Symbol] action type
24
+ # @return [Saper::Argument]
25
+ def self.[](type)
26
+ subclasses[type.to_s] || raise(InvalidType, "Invalid argument: %s" % type)
27
+ end
28
+
29
+ # Returns `true` if there is a subclass with specified type.
30
+ # @param type [Symbol] action type
31
+ # @return [Boolean]
32
+ def self.exists?(type)
33
+ subclasses.keys.include?(type.to_s)
34
+ end
35
+
36
+ # Returns a new instance of Saper::Argument.
37
+ # @return [Saper::Argument]
38
+ def self.new(*args, &block)
39
+ if self == Argument
40
+ self[args.shift].new(*args, &block)
41
+ else
42
+ super(*args, &block)
43
+ end
44
+ end
45
+
46
+ # @todo
47
+ def initialize(opts = {})
48
+ @value = nil
49
+ @opts = opts
50
+ if mandatory? || !@opts[:value].nil?
51
+ set @opts.delete(:value)
52
+ end
53
+ end
54
+
55
+ # @todo
56
+ def set(value)
57
+ unless valid?(value)
58
+ raise InvalidArgument, value
59
+ else
60
+ @value = normalize(value)
61
+ end
62
+ self
63
+ end
64
+
65
+ # @todo
66
+ def valid?(value)
67
+ true
68
+ end
69
+
70
+ # @todo
71
+ def normalize(value)
72
+ value
73
+ end
74
+
75
+ # @todo
76
+ def value
77
+ @value
78
+ end
79
+
80
+ # @todo
81
+ def serialize
82
+ value
83
+ end
84
+
85
+ # @todo
86
+ def mandatory?
87
+ not optional?
88
+ end
89
+
90
+ # @todo
91
+ def action
92
+ @opts[:action]
93
+ end
94
+
95
+ # @todo
96
+ def optional?
97
+ @opts[:optional] == true
98
+ end
99
+
100
+ # @todo
101
+ def to_string
102
+ value.to_s.inspect
103
+ end
104
+
105
+ end
106
+ end
@@ -0,0 +1,87 @@
1
+ module Saper
2
+ class Browser
3
+
4
+ require 'mechanize'
5
+
6
+ attr_reader :history, :received, :sent
7
+
8
+ def initialize(options = {})
9
+ @agent = options.delete(:agent)
10
+ @headers = options.delete(:headers)
11
+ @logger = options.delete(:logger) || Saper::Logger.new
12
+ @history = []
13
+ @received = 0
14
+ @sent = 0
15
+ @mech = Mechanize.new do |a|
16
+ a.robots = false
17
+ a.user_agent = agent
18
+ a.request_headers = headers
19
+ a.pluggable_parser.csv = nil
20
+ a.pluggable_parser.html = nil
21
+ a.pluggable_parser.xhtml = nil
22
+ a.pluggable_parser.xml = nil
23
+ end
24
+ @mech.pre_connect_hook do |agent, req|
25
+ @sent += req.to_hash.to_s.size
26
+ @sent += (req.body.nil? ? 0 : req.body.size)
27
+ end
28
+ @mech.post_connect_hook do |agent, uri, resp, body|
29
+ @received += resp.to_hash.to_s.size
30
+ @received += body.size
31
+ end
32
+ end
33
+
34
+ # Returns number of HTTP requests
35
+ def requests
36
+ @history.size
37
+ end
38
+
39
+ #
40
+ def headers
41
+ @headers.respond_to?(:to_hash) ? @headers : {}
42
+ end
43
+
44
+ #
45
+ def get(url, query = {})
46
+ @logger.download(url)
47
+ @history.push url
48
+ data = @mech.get(url, query)
49
+ Saper::Items::Document.new data
50
+ end
51
+
52
+ #
53
+ def post(url, query = {})
54
+ @logger.download(url)
55
+ @history.push url
56
+ data = @mech.post(url, query)
57
+ Saper::Items::Document.new data
58
+ end
59
+
60
+ #
61
+ def agent
62
+ case @agent
63
+ when :ie6
64
+ 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'
65
+ when :ie7
66
+ 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
67
+ when :ie8
68
+ 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
69
+ when :ie9
70
+ 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)'
71
+ when :mozilla
72
+ 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6'
73
+ when :safari
74
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22'
75
+ when :iphone
76
+ 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3'
77
+ when :ipad
78
+ 'Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10'
79
+ when :android
80
+ 'Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13'
81
+ else
82
+ 'Mozilla/5.0 (compatible; Saper Ruby client %s)' % Saper::VERSION
83
+ end
84
+ end
85
+
86
+ end
87
+ end
@@ -0,0 +1,68 @@
1
+ module Saper
2
+ module DSL
3
+
4
+ def self.new
5
+ Module.new.extend(Methods)
6
+ end
7
+
8
+ def self.included(base)
9
+ base.extend(Methods)
10
+ end
11
+
12
+ module Methods
13
+
14
+ def namespace
15
+ @namespace ||= Saper::Namespace.new
16
+ end
17
+
18
+ def recipe(id, name = nil, &block)
19
+ namespace[id] = Recipe.parse(id, name, :namespace => namespace, &block)
20
+ end
21
+
22
+ def [](name)
23
+ namespace[name]
24
+ end
25
+
26
+ def run_by_default(*args)
27
+ namespace.run_by_default(*args)
28
+ end
29
+
30
+ def run(*args)
31
+ namespace.run(*args)
32
+ end
33
+
34
+ end
35
+
36
+ class Recipe
37
+ # Parses block and returns a Recipe instance or a proxy object.
38
+ # If namespace is specified within options, full initialization of
39
+ # Recipe is delayed and a proxy object is returned (which supports
40
+ # `#to_recipe`).
41
+ # @param id [Symbol] recipe ID
42
+ # @return [Saper::Recipe, Saper::DSL::Recipe]
43
+ def self.parse(id, name = nil, options = {}, &block)
44
+ instance = self.new(id, options.merge(:name => name), &block)
45
+ if options[:namespace].is_a?(Namespace)
46
+ instance
47
+ else
48
+ instance.to_recipe
49
+ end
50
+ end
51
+
52
+ attr_reader :recipe
53
+
54
+ def initialize(id = nil, options = {}, &block)
55
+ @recipe, @block = Saper::Recipe.new(id, options), block
56
+ end
57
+
58
+ def to_recipe
59
+ self.instance_eval(&@block) if recipe.empty?; recipe
60
+ end
61
+
62
+ def method_missing(name, *args, &block)
63
+ @recipe << Saper::Action.new(name, *args, :namespace => @recipe.namespace, &block)
64
+ end
65
+
66
+ end
67
+ end
68
+ end