imw 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +194 -31
- data/VERSION +1 -1
- data/bin/imw +5 -0
- data/lib/imw/boot.rb +0 -15
- data/lib/imw/dataset/paths.rb +38 -0
- data/lib/imw/dataset/task.rb +21 -18
- data/lib/imw/dataset/workflow.rb +126 -65
- data/lib/imw/dataset.rb +56 -82
- data/lib/imw/files/basicfile.rb +3 -3
- data/lib/imw/files/compressed_files_and_archives.rb +23 -37
- data/lib/imw/files/csv.rb +2 -1
- data/lib/imw/files/directory.rb +62 -0
- data/lib/imw/files/excel.rb +84 -0
- data/lib/imw/files/sgml.rb +4 -23
- data/lib/imw/files.rb +62 -47
- data/lib/imw/packagers/archiver.rb +19 -1
- data/lib/imw/packagers/s3_mover.rb +8 -0
- data/lib/imw/parsers/html_parser/matchers.rb +251 -268
- data/lib/imw/parsers/html_parser.rb +181 -176
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +35 -0
- data/lib/imw/runner.rb +114 -0
- data/lib/imw/utils/extensions/core.rb +0 -16
- data/lib/imw/utils/paths.rb +0 -28
- data/lib/imw.rb +21 -32
- metadata +11 -19
- data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +0 -37
- data/lib/imw/dataset/datamapper.rb +0 -66
- data/lib/imw/dataset/loaddump.rb +0 -50
- data/lib/imw/dataset/old/file_collection.rb +0 -88
- data/lib/imw/dataset/old/file_collection_utils.rb +0 -71
- data/lib/imw/dataset/scaffold.rb +0 -132
- data/lib/imw/dataset/scraped_uri.rb +0 -305
- data/lib/imw/dataset/scrub/old_working_scrubber.rb +0 -87
- data/lib/imw/dataset/scrub/scrub.rb +0 -147
- data/lib/imw/dataset/scrub/scrub_simple_url.rb +0 -38
- data/lib/imw/dataset/scrub/scrub_test.rb +0 -60
- data/lib/imw/dataset/scrub/slug.rb +0 -101
- data/lib/imw/dataset/stats/counter.rb +0 -23
- data/lib/imw/dataset/stats.rb +0 -73
@@ -26,6 +26,10 @@ module IMW
|
|
26
26
|
add_inputs inputs
|
27
27
|
end
|
28
28
|
|
29
|
+
# FIXME Instead of requiring +new_inputs+ to be either an Array
|
30
|
+
# or Hash just iterate through whatever it is using +each+ and
|
31
|
+
# see if the iterate can be interpreted as a mapping between
|
32
|
+
# strings.
|
29
33
|
def add_inputs new_inputs
|
30
34
|
@inputs ||= {}
|
31
35
|
if new_inputs.is_a?(Array)
|
@@ -70,6 +74,10 @@ module IMW
|
|
70
74
|
@dir ||= File.join(tmp_dir, name.to_s)
|
71
75
|
end
|
72
76
|
|
77
|
+
# FIXME This needs to be made idempotent -- calling prepare
|
78
|
+
# twice should not do any work the second time (unless the user
|
79
|
+
# is insistent and passes a :force option -- or maybe use bang
|
80
|
+
# and not-bang versions of the method for this distinction).
|
73
81
|
def prepare!
|
74
82
|
FileUtils.mkdir_p dir unless File.exist?(dir)
|
75
83
|
inputs.each_pair do |path, basename|
|
@@ -87,7 +95,17 @@ module IMW
|
|
87
95
|
end
|
88
96
|
end
|
89
97
|
end
|
90
|
-
|
98
|
+
|
99
|
+
# Package the contents of the temporary directory to an archive
|
100
|
+
# at +output+ but return exceptions instead of raising them.
|
101
|
+
def package output, options={}
|
102
|
+
begin
|
103
|
+
package! output, options={}
|
104
|
+
rescue RuntimeError => e
|
105
|
+
return e
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
91
109
|
# Package the contents of the temporary directory to an archive
|
92
110
|
# at +output+.
|
93
111
|
def package! output, options={}
|
@@ -19,6 +19,14 @@ module IMW
|
|
19
19
|
last_response && last_response.response.class == Net::HTTPOK
|
20
20
|
end
|
21
21
|
|
22
|
+
def upload local_path, remote_path
|
23
|
+
begin
|
24
|
+
upload! local_path, remote_path
|
25
|
+
rescue RuntimeError => e
|
26
|
+
return e
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
22
30
|
def upload! local_path, remote_path
|
23
31
|
@last_response = AWS::S3::S3Object.store(remote_path, open(local_path), bucket_name)
|
24
32
|
end
|
@@ -1,305 +1,288 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
#
|
5
|
-
# h2. lib/imw/parsers/html_parser/matcher.rb -- utility classes for html parser
|
6
|
-
#
|
7
|
-
# == About
|
8
|
-
#
|
9
|
-
# This file defines the <tt>IMW::HTMLParserMatcher::Matcher</tt>
|
10
|
-
# abstract class and some concrete subclasses which perform specific
|
11
|
-
# kinds of matches against HTML documents using the
|
12
|
-
# Hpricot[https://code.whytheluckystiff.net/hpricot/] library.
|
13
|
-
#
|
14
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
15
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
16
|
-
# License:: GPL 3.0
|
17
|
-
# Website:: http://infinitemonkeywrench.org/
|
18
|
-
#
|
19
|
-
# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
|
20
|
-
|
21
1
|
require 'imw/utils/extensions/hpricot'
|
22
2
|
|
23
3
|
module IMW
|
24
|
-
module
|
4
|
+
module Parsers
|
5
|
+
module HtmlMatchers
|
25
6
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
7
|
+
# An abstract class from which to subclass specific HTML matchers.
|
8
|
+
#
|
9
|
+
# A subclass is initialized with a +selector+ and an optional
|
10
|
+
# +matcher+. The +selector+ is an HTML path specification used to
|
11
|
+
# collect elements from the document. If initialized with a
|
12
|
+
# +matcher+, the +matcher+ is used to return match information
|
13
|
+
# from the elements; else the inner HTML is returned. Subclasses
|
14
|
+
# decide how the +selector+ will collect elements.
|
15
|
+
class Matcher
|
16
|
+
|
17
|
+
attr_accessor :selector
|
18
|
+
attr_accessor :matcher
|
19
|
+
attr_accessor :options
|
20
|
+
|
21
|
+
def initialize selector, matcher=nil, options={}
|
22
|
+
self.selector = selector
|
23
|
+
self.matcher = matcher
|
24
|
+
self.options = options
|
25
|
+
end
|
45
26
|
|
46
|
-
|
47
|
-
|
27
|
+
def match doc
|
28
|
+
raise "Abstract class #{self.class}"
|
29
|
+
end
|
30
|
+
|
48
31
|
end
|
49
|
-
|
50
|
-
end
|
51
32
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
33
|
+
# Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
|
34
|
+
# for matching against the first element of a document matching a
|
35
|
+
# selector.
|
36
|
+
class MatchFirstElement < Matcher
|
37
|
+
# Grab the first element from +doc+ matching the +selector+ this
|
38
|
+
# class was initialized with. If initialized with a +matcher+,
|
39
|
+
# then return the +matcher+'s match against the first element,
|
40
|
+
# else just return the inner HTML of the first element.
|
41
|
+
#
|
42
|
+
# m = MatchFirstElement.new('span#bio/a.homepage')
|
43
|
+
# m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
|
44
|
+
# # => 'My Homepage'
|
45
|
+
def match doc
|
46
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
47
|
+
el = doc.at(selector) or return nil
|
48
|
+
if matcher
|
49
|
+
matcher.match(el)
|
50
|
+
else
|
51
|
+
options[:html] ? el : el.inner_text.strip
|
52
|
+
end
|
71
53
|
end
|
72
54
|
end
|
73
|
-
end
|
74
55
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
56
|
+
# FIXME is there really a need for this separate class? why can't
|
57
|
+
# MatchFirstElement.match accept a block?
|
58
|
+
class MatchProc < MatchFirstElement
|
59
|
+
attr_accessor :proc
|
60
|
+
attr_accessor :options
|
61
|
+
def initialize selector, proc, matcher=nil, options={}
|
62
|
+
super selector, matcher
|
63
|
+
self.options = options
|
64
|
+
self.proc = proc
|
65
|
+
end
|
66
|
+
def match doc
|
67
|
+
val = super doc
|
68
|
+
val ? self.proc.call(val) : self.proc.call(doc)
|
69
|
+
end
|
70
|
+
end
|
90
71
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
else
|
111
|
-
if options[:html]
|
112
|
-
subdoc.map{|el| el.inner_html }
|
72
|
+
# Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
|
73
|
+
# for matching each element of a document matching a selector.
|
74
|
+
class MatchArray < Matcher
|
75
|
+
# Grab each element from +doc+ matching the +selector+ this
|
76
|
+
# class was initialized with. If initialized with a +matcher+,
|
77
|
+
# then return an array consisting of the +matcher+'s match
|
78
|
+
# against each element, else just return an array consisting of
|
79
|
+
# the inner HTML of each element.
|
80
|
+
#
|
81
|
+
# m = MatchArray.new('span#bio/a.homepage')
|
82
|
+
# m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>
|
83
|
+
# <span id="bio"><a class="homepage" href="http://foo.baz">Your Homepage</a></span>
|
84
|
+
# <span id="bio"><a class="homepage" href="http://foo.qux">Their Homepage</a></span>')
|
85
|
+
# # => ["My Homepage", "Your Homepage", "Their Homepage"]
|
86
|
+
def match doc
|
87
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
88
|
+
subdoc = (doc/selector) or return nil
|
89
|
+
if matcher
|
90
|
+
subdoc.map{|el| matcher.match(el)}
|
113
91
|
else
|
114
|
-
|
92
|
+
if options[:html]
|
93
|
+
subdoc.map{|el| el }
|
94
|
+
else
|
95
|
+
subdoc.map{|el| el.inner_text.strip }
|
96
|
+
end
|
115
97
|
end
|
116
98
|
end
|
117
99
|
end
|
118
|
-
end
|
119
100
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
101
|
+
# Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
|
102
|
+
# for matching an attribute of the first element of a document
|
103
|
+
# matching a selector.
|
104
|
+
class MatchAttribute < Matcher
|
124
105
|
|
125
|
-
|
106
|
+
attr_accessor :attribute
|
126
107
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
108
|
+
# Unlike <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>,
|
109
|
+
# <tt>IMW::Parsers::HtmlMatchers::MatchAttribute</tt> is initialized
|
110
|
+
# with three arguments: the +selector+ which collects elements
|
111
|
+
# from an HTML document, an +attribute+ to extract, and
|
112
|
+
# (optionally) a +matcher+ to perform the matching.
|
113
|
+
def initialize selector, attribute, matcher=nil
|
114
|
+
super selector, matcher
|
115
|
+
self.attribute = attribute.to_s
|
116
|
+
end
|
117
|
+
|
118
|
+
# Grab the first element from +doc+ matching the +selector+ this
|
119
|
+
# class was initialized with. If initialized with a +matcher+,
|
120
|
+
# then return the +matcher+'s match against the value of the
|
121
|
+
# +attribute+ this class was initialized with, else just return
|
122
|
+
# the value of the +attribute+.
|
123
|
+
#
|
124
|
+
# m = MatchAttribute.new('span#bio/a.homepage', 'href')
|
125
|
+
# m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
|
126
|
+
# # => 'http://foo.bar'
|
127
|
+
def match doc
|
128
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
129
|
+
val = doc.path_attr(selector, attribute)
|
130
|
+
matcher ? matcher.match(val) : val
|
131
|
+
end
|
150
132
|
end
|
151
|
-
end
|
152
133
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
134
|
+
# Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
|
135
|
+
# for using a regular expression to match against text in an HTML
|
136
|
+
# document.
|
137
|
+
class MatchRegexp < Matcher
|
138
|
+
|
139
|
+
attr_accessor :re
|
140
|
+
attr_accessor :options
|
160
141
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
142
|
+
# Use the regular expression +re+ to return captures from the
|
143
|
+
# elements collected by +selector+ (treated as text) used on an
|
144
|
+
# HTML document (if +selector+ is +nil+ then match against the
|
145
|
+
# full text of the document). If the keyword argument
|
146
|
+
# <tt>:capture</tt> is specified then return the corresponding
|
147
|
+
# group (indexing is that of regular expressions; "1" is the
|
148
|
+
# first capture), else return an array of all captures. If
|
149
|
+
# +matcher+, then use it on the capture(s) before returning.
|
150
|
+
#
|
151
|
+
# FIXME Shouldn't the matcher come BEFORE the regexp capture,
|
152
|
+
# not after?
|
153
|
+
def initialize selector, re, matcher=nil, options={}
|
154
|
+
super selector, matcher
|
155
|
+
self.options = options
|
156
|
+
self.re = re
|
157
|
+
end
|
177
158
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
159
|
+
# Grab the first element from +doc+ matching the +selector+ this
|
160
|
+
# object was initialized with. Use the +re+ and the (optional)
|
161
|
+
# capture group this object was initialized with to capture a
|
162
|
+
# string (or array of strings if no capture group was specified)
|
163
|
+
# from the collected element (treated as text). If initialized
|
164
|
+
# with a +matcher+, then return the +matcher+'s match against
|
165
|
+
# the value of the capture(s), else just return the capture(s).
|
166
|
+
#
|
167
|
+
# m = MatchRegexp.new('span#bio/a.homepage', /Homepage of (.*)$/, nil, :capture => 1 )
|
168
|
+
# m.match('<span id="bio"><a class="homepage" href="http://foo.bar">Homepage of John Chimpo</a></span>')
|
169
|
+
# # => "John Chimpo"
|
170
|
+
def match doc
|
171
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
172
|
+
el = selector ? doc.contents_of(selector) : doc
|
173
|
+
m = re.match(el.to_s)
|
174
|
+
val = case
|
175
|
+
when m.nil? then nil
|
176
|
+
when self.options.key?(:capture) then m.captures[self.options[:capture] - 1] # -1 to match regexp indexing
|
177
|
+
else m.captures
|
178
|
+
end
|
179
|
+
# pass to matcher, if any
|
180
|
+
matcher ? matcher.match(val) : val
|
181
|
+
end
|
200
182
|
end
|
201
|
-
end
|
202
183
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
184
|
+
|
185
|
+
class MatchRegexpRepeatedly < Matcher
|
186
|
+
attr_accessor :re
|
187
|
+
def initialize selector, re, matcher=nil
|
188
|
+
super selector, matcher
|
189
|
+
self.re = re
|
190
|
+
end
|
191
|
+
def match doc
|
192
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
193
|
+
# apply selector, if any
|
194
|
+
el = selector ? doc.contents_of(selector) : doc
|
195
|
+
return unless el
|
196
|
+
# get all matches
|
197
|
+
val = el.to_s.scan(re)
|
198
|
+
# if there's only one capture group, flatten the array
|
199
|
+
val = val.flatten if val.first && val.first.length == 1
|
200
|
+
# pass to matcher, if any
|
201
|
+
matcher ? matcher.match(val) : val
|
202
|
+
end
|
221
203
|
end
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
204
|
+
|
205
|
+
# Class for building a hash of values by using appropriate
|
206
|
+
# matchers against an HTML document.
|
207
|
+
class MatchHash
|
208
|
+
|
209
|
+
attr_accessor :match_hash
|
227
210
|
|
228
|
-
|
211
|
+
# The +match_hash+ must be a +Hash+ of symbols matched to HTML
|
212
|
+
# matchers (subclasses of
|
213
|
+
# <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>).
|
214
|
+
def initialize match_hash
|
215
|
+
# Kludge? maybe.
|
216
|
+
raise "MatchHash requires a hash of :attributes => matchers." unless match_hash.is_a?(Hash)
|
217
|
+
self.match_hash = match_hash
|
218
|
+
end
|
229
219
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
#
|
235
|
-
|
236
|
-
|
220
|
+
# Use the +match_hash+ this +MatchHash+ was initialized with to
|
221
|
+
# select elements from +doc+ and extract information from them:
|
222
|
+
#
|
223
|
+
# m = MatchHash.new({
|
224
|
+
# :name => MatchFirstElement.new('li/span.customer'),
|
225
|
+
# :order_status => MatchAttribute.new('li/ul[@status]','status'),
|
226
|
+
# :products => MatchArray.new('li/ul/li')
|
227
|
+
# })
|
228
|
+
# m.match('<li><span class="customer">John Chimpo</span>
|
229
|
+
# <ul status="shipped">
|
230
|
+
# <li>bananas</li>
|
231
|
+
# <li>mangos</li>
|
232
|
+
# <li>banangos</li>
|
233
|
+
# </ul></li>')
|
234
|
+
# # => {
|
235
|
+
# :name => "John Chimpo",
|
236
|
+
# :order_status => "shipped",
|
237
|
+
# :products => ["bananas", "mangos", "banangos"]
|
238
|
+
# }
|
239
|
+
def match doc
|
240
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
241
|
+
hsh = { }
|
242
|
+
match_hash.each do |attr, m|
|
243
|
+
val = m.match(doc)
|
244
|
+
case attr
|
245
|
+
when Array then hsh.merge!(Hash.zip(attr, val).reject{|k,v| v.nil? }) if val
|
246
|
+
else hsh[attr] = val end
|
247
|
+
end
|
248
|
+
self.class.scrub!(hsh)
|
249
|
+
end
|
250
|
+
|
251
|
+
# kill off keys with nil values
|
252
|
+
def self.scrub! hsh
|
253
|
+
hsh # .reject{|k,v| v.nil? }
|
254
|
+
end
|
237
255
|
end
|
238
256
|
|
239
|
-
# Use the +match_hash+ this +MatchHash+ was initialized with to
|
240
|
-
# select elements from +doc+ and extract information from them:
|
241
257
|
#
|
242
|
-
#
|
243
|
-
#
|
244
|
-
|
245
|
-
# :products => MatchArray.new('li/ul/li')
|
246
|
-
# })
|
247
|
-
# m.match('<li><span class="customer">John Chimpo</span>
|
248
|
-
# <ul status="shipped">
|
249
|
-
# <li>bananas</li>
|
250
|
-
# <li>mangos</li>
|
251
|
-
# <li>banangos</li>
|
252
|
-
# </ul></li>')
|
253
|
-
# # => {
|
254
|
-
# :name => "John Chimpo",
|
255
|
-
# :order_status => "shipped",
|
256
|
-
# :products => ["bananas", "mangos", "banangos"]
|
257
|
-
# }
|
258
|
-
def match doc
|
259
|
-
doc = Hpricot(doc) if doc.is_a?(String)
|
258
|
+
# construct the downstream part of a hash matcher
|
259
|
+
#
|
260
|
+
def self.build_match_hash spec_hash
|
260
261
|
hsh = { }
|
261
|
-
|
262
|
-
|
263
|
-
case attr
|
264
|
-
when Array then hsh.merge!(Hash.zip(attr, val).reject{|k,v| v.nil? }) if val
|
265
|
-
else hsh[attr] = val end
|
262
|
+
spec_hash.each do |attr, spec|
|
263
|
+
hsh[attr] = build_parse_tree(spec)
|
266
264
|
end
|
267
|
-
|
268
|
-
end
|
269
|
-
|
270
|
-
# kill off keys with nil values
|
271
|
-
def self.scrub! hsh
|
272
|
-
hsh # .reject{|k,v| v.nil? }
|
265
|
+
hsh
|
273
266
|
end
|
274
|
-
end
|
275
|
-
|
276
|
-
#
|
277
|
-
# construct the downstream part of a hash matcher
|
278
|
-
#
|
279
|
-
def self.build_match_hash spec_hash
|
280
|
-
hsh = { }
|
281
|
-
spec_hash.each do |attr, spec|
|
282
|
-
hsh[attr] = build_parse_tree(spec)
|
283
|
-
end
|
284
|
-
hsh
|
285
|
-
end
|
286
267
|
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
268
|
+
#
|
269
|
+
# recursively build a tree of matchers
|
270
|
+
#
|
271
|
+
def self.build_parse_tree spec
|
272
|
+
case spec
|
273
|
+
when nil then nil
|
274
|
+
when Matcher then spec
|
275
|
+
when Hash then MatchHash.new(build_match_hash(spec))
|
276
|
+
when Array then
|
277
|
+
return nil if spec.empty?
|
278
|
+
raise "Array spec must be a single selector or a selector and another match specification" unless (spec.length <= 2)
|
279
|
+
MatchArray.new(spec[0].to_s, build_parse_tree(spec[1]))
|
280
|
+
when String then MatchFirstElement.new(spec)
|
281
|
+
when Proc then MatchProc.new(nil, spec)
|
282
|
+
when Regexp then MatchRegexp.new(nil, spec, nil, :capture => 1)
|
283
|
+
when Symbol then MatchAttribute.new(nil, spec, nil)
|
284
|
+
else raise "Don't know how to parse #{spec.inspect}"
|
285
|
+
end
|
303
286
|
end
|
304
287
|
end
|
305
288
|
end
|