imw 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +194 -31
- data/VERSION +1 -1
- data/bin/imw +5 -0
- data/lib/imw/boot.rb +0 -15
- data/lib/imw/dataset/paths.rb +38 -0
- data/lib/imw/dataset/task.rb +21 -18
- data/lib/imw/dataset/workflow.rb +126 -65
- data/lib/imw/dataset.rb +56 -82
- data/lib/imw/files/basicfile.rb +3 -3
- data/lib/imw/files/compressed_files_and_archives.rb +23 -37
- data/lib/imw/files/csv.rb +2 -1
- data/lib/imw/files/directory.rb +62 -0
- data/lib/imw/files/excel.rb +84 -0
- data/lib/imw/files/sgml.rb +4 -23
- data/lib/imw/files.rb +62 -47
- data/lib/imw/packagers/archiver.rb +19 -1
- data/lib/imw/packagers/s3_mover.rb +8 -0
- data/lib/imw/parsers/html_parser/matchers.rb +251 -268
- data/lib/imw/parsers/html_parser.rb +181 -176
- data/lib/imw/parsers.rb +1 -1
- data/lib/imw/repository.rb +35 -0
- data/lib/imw/runner.rb +114 -0
- data/lib/imw/utils/extensions/core.rb +0 -16
- data/lib/imw/utils/paths.rb +0 -28
- data/lib/imw.rb +21 -32
- metadata +11 -19
- data/lib/imw/dataset/datamapper/time_and_user_stamps.rb +0 -37
- data/lib/imw/dataset/datamapper.rb +0 -66
- data/lib/imw/dataset/loaddump.rb +0 -50
- data/lib/imw/dataset/old/file_collection.rb +0 -88
- data/lib/imw/dataset/old/file_collection_utils.rb +0 -71
- data/lib/imw/dataset/scaffold.rb +0 -132
- data/lib/imw/dataset/scraped_uri.rb +0 -305
- data/lib/imw/dataset/scrub/old_working_scrubber.rb +0 -87
- data/lib/imw/dataset/scrub/scrub.rb +0 -147
- data/lib/imw/dataset/scrub/scrub_simple_url.rb +0 -38
- data/lib/imw/dataset/scrub/scrub_test.rb +0 -60
- data/lib/imw/dataset/scrub/slug.rb +0 -101
- data/lib/imw/dataset/stats/counter.rb +0 -23
- data/lib/imw/dataset/stats.rb +0 -73
@@ -26,6 +26,10 @@ module IMW
|
|
26
26
|
add_inputs inputs
|
27
27
|
end
|
28
28
|
|
29
|
+
# FIXME Instead of requiring +new_inputs+ to be either an Array
|
30
|
+
# or Hash just iterate through whatever it is using +each+ and
|
31
|
+
# see if the iterate can be interpreted as a mapping between
|
32
|
+
# strings.
|
29
33
|
def add_inputs new_inputs
|
30
34
|
@inputs ||= {}
|
31
35
|
if new_inputs.is_a?(Array)
|
@@ -70,6 +74,10 @@ module IMW
|
|
70
74
|
@dir ||= File.join(tmp_dir, name.to_s)
|
71
75
|
end
|
72
76
|
|
77
|
+
# FIXME This needs to be made idempotent -- calling prepare
|
78
|
+
# twice should not do any work the second time (unless the user
|
79
|
+
# is insistent and passes a :force option -- or maybe use bang
|
80
|
+
# and not-bang versions of the method for this distinction).
|
73
81
|
def prepare!
|
74
82
|
FileUtils.mkdir_p dir unless File.exist?(dir)
|
75
83
|
inputs.each_pair do |path, basename|
|
@@ -87,7 +95,17 @@ module IMW
|
|
87
95
|
end
|
88
96
|
end
|
89
97
|
end
|
90
|
-
|
98
|
+
|
99
|
+
# Package the contents of the temporary directory to an archive
|
100
|
+
# at +output+ but return exceptions instead of raising them.
|
101
|
+
def package output, options={}
|
102
|
+
begin
|
103
|
+
package! output, options={}
|
104
|
+
rescue RuntimeError => e
|
105
|
+
return e
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
91
109
|
# Package the contents of the temporary directory to an archive
|
92
110
|
# at +output+.
|
93
111
|
def package! output, options={}
|
@@ -19,6 +19,14 @@ module IMW
|
|
19
19
|
last_response && last_response.response.class == Net::HTTPOK
|
20
20
|
end
|
21
21
|
|
22
|
+
def upload local_path, remote_path
|
23
|
+
begin
|
24
|
+
upload! local_path, remote_path
|
25
|
+
rescue RuntimeError => e
|
26
|
+
return e
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
22
30
|
def upload! local_path, remote_path
|
23
31
|
@last_response = AWS::S3::S3Object.store(remote_path, open(local_path), bucket_name)
|
24
32
|
end
|
@@ -1,305 +1,288 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
#
|
5
|
-
# h2. lib/imw/parsers/html_parser/matcher.rb -- utility classes for html parser
|
6
|
-
#
|
7
|
-
# == About
|
8
|
-
#
|
9
|
-
# This file defines the <tt>IMW::HTMLParserMatcher::Matcher</tt>
|
10
|
-
# abstract class and some concrete subclasses which perform specific
|
11
|
-
# kinds of matches against HTML documents using the
|
12
|
-
# Hpricot[https://code.whytheluckystiff.net/hpricot/] library.
|
13
|
-
#
|
14
|
-
# Author:: (Philip flip Kromer, Dhruv Bansal) for Infinite Monkeywrench Project (mailto:coders@infochimps.org)
|
15
|
-
# Copyright:: Copyright (c) 2008 infochimps.org
|
16
|
-
# License:: GPL 3.0
|
17
|
-
# Website:: http://infinitemonkeywrench.org/
|
18
|
-
#
|
19
|
-
# puts "#{File.basename(__FILE__)}: Something clever" # at bottom
|
20
|
-
|
21
1
|
require 'imw/utils/extensions/hpricot'
|
22
2
|
|
23
3
|
module IMW
|
24
|
-
module
|
4
|
+
module Parsers
|
5
|
+
module HtmlMatchers
|
25
6
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
7
|
+
# An abstract class from which to subclass specific HTML matchers.
|
8
|
+
#
|
9
|
+
# A subclass is initialized with a +selector+ and an optional
|
10
|
+
# +matcher+. The +selector+ is an HTML path specification used to
|
11
|
+
# collect elements from the document. If initialized with a
|
12
|
+
# +matcher+, the +matcher+ is used to return match information
|
13
|
+
# from the elements; else the inner HTML is returned. Subclasses
|
14
|
+
# decide how the +selector+ will collect elements.
|
15
|
+
class Matcher
|
16
|
+
|
17
|
+
attr_accessor :selector
|
18
|
+
attr_accessor :matcher
|
19
|
+
attr_accessor :options
|
20
|
+
|
21
|
+
def initialize selector, matcher=nil, options={}
|
22
|
+
self.selector = selector
|
23
|
+
self.matcher = matcher
|
24
|
+
self.options = options
|
25
|
+
end
|
45
26
|
|
46
|
-
|
47
|
-
|
27
|
+
def match doc
|
28
|
+
raise "Abstract class #{self.class}"
|
29
|
+
end
|
30
|
+
|
48
31
|
end
|
49
|
-
|
50
|
-
end
|
51
32
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
33
|
+
# Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
|
34
|
+
# for matching against the first element of a document matching a
|
35
|
+
# selector.
|
36
|
+
class MatchFirstElement < Matcher
|
37
|
+
# Grab the first element from +doc+ matching the +selector+ this
|
38
|
+
# class was initialized with. If initialized with a +matcher+,
|
39
|
+
# then return the +matcher+'s match against the first element,
|
40
|
+
# else just return the inner HTML of the first element.
|
41
|
+
#
|
42
|
+
# m = MatchFirstElement.new('span#bio/a.homepage')
|
43
|
+
# m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
|
44
|
+
# # => 'My Homepage'
|
45
|
+
def match doc
|
46
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
47
|
+
el = doc.at(selector) or return nil
|
48
|
+
if matcher
|
49
|
+
matcher.match(el)
|
50
|
+
else
|
51
|
+
options[:html] ? el : el.inner_text.strip
|
52
|
+
end
|
71
53
|
end
|
72
54
|
end
|
73
|
-
end
|
74
55
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
56
|
+
# FIXME is there really a need for this separate class? why can't
|
57
|
+
# MatchFirstElement.match accept a block?
|
58
|
+
class MatchProc < MatchFirstElement
|
59
|
+
attr_accessor :proc
|
60
|
+
attr_accessor :options
|
61
|
+
def initialize selector, proc, matcher=nil, options={}
|
62
|
+
super selector, matcher
|
63
|
+
self.options = options
|
64
|
+
self.proc = proc
|
65
|
+
end
|
66
|
+
def match doc
|
67
|
+
val = super doc
|
68
|
+
val ? self.proc.call(val) : self.proc.call(doc)
|
69
|
+
end
|
70
|
+
end
|
90
71
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
else
|
111
|
-
if options[:html]
|
112
|
-
subdoc.map{|el| el.inner_html }
|
72
|
+
# Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
|
73
|
+
# for matching each element of a document matching a selector.
|
74
|
+
class MatchArray < Matcher
|
75
|
+
# Grab each element from +doc+ matching the +selector+ this
|
76
|
+
# class was initialized with. If initialized with a +matcher+,
|
77
|
+
# then return an array consisting of the +matcher+'s match
|
78
|
+
# against each element, else just return an array consisting of
|
79
|
+
# the inner HTML of each element.
|
80
|
+
#
|
81
|
+
# m = MatchArray.new('span#bio/a.homepage')
|
82
|
+
# m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>
|
83
|
+
# <span id="bio"><a class="homepage" href="http://foo.baz">Your Homepage</a></span>
|
84
|
+
# <span id="bio"><a class="homepage" href="http://foo.qux">Their Homepage</a></span>')
|
85
|
+
# # => ["My Homepage", "Your Homepage", "Their Homepage"]
|
86
|
+
def match doc
|
87
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
88
|
+
subdoc = (doc/selector) or return nil
|
89
|
+
if matcher
|
90
|
+
subdoc.map{|el| matcher.match(el)}
|
113
91
|
else
|
114
|
-
|
92
|
+
if options[:html]
|
93
|
+
subdoc.map{|el| el }
|
94
|
+
else
|
95
|
+
subdoc.map{|el| el.inner_text.strip }
|
96
|
+
end
|
115
97
|
end
|
116
98
|
end
|
117
99
|
end
|
118
|
-
end
|
119
100
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
101
|
+
# Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
|
102
|
+
# for matching an attribute of the first element of a document
|
103
|
+
# matching a selector.
|
104
|
+
class MatchAttribute < Matcher
|
124
105
|
|
125
|
-
|
106
|
+
attr_accessor :attribute
|
126
107
|
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
108
|
+
# Unlike <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>,
|
109
|
+
# <tt>IMW::Parsers::HtmlMatchers::MatchAttribute</tt> is initialized
|
110
|
+
# with three arguments: the +selector+ which collects elements
|
111
|
+
# from an HTML document, an +attribute+ to extract, and
|
112
|
+
# (optionally) a +matcher+ to perform the matching.
|
113
|
+
def initialize selector, attribute, matcher=nil
|
114
|
+
super selector, matcher
|
115
|
+
self.attribute = attribute.to_s
|
116
|
+
end
|
117
|
+
|
118
|
+
# Grab the first element from +doc+ matching the +selector+ this
|
119
|
+
# class was initialized with. If initialized with a +matcher+,
|
120
|
+
# then return the +matcher+'s match against the value of the
|
121
|
+
# +attribute+ this class was initialized with, else just return
|
122
|
+
# the value of the +attribute+.
|
123
|
+
#
|
124
|
+
# m = MatchAttribute.new('span#bio/a.homepage', 'href')
|
125
|
+
# m.match('<span id="bio"><a class="homepage" href="http://foo.bar">My Homepage</a></span>')
|
126
|
+
# # => 'http://foo.bar'
|
127
|
+
def match doc
|
128
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
129
|
+
val = doc.path_attr(selector, attribute)
|
130
|
+
matcher ? matcher.match(val) : val
|
131
|
+
end
|
150
132
|
end
|
151
|
-
end
|
152
133
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
134
|
+
# Concrete subclass of <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>
|
135
|
+
# for using a regular expression to match against text in an HTML
|
136
|
+
# document.
|
137
|
+
class MatchRegexp < Matcher
|
138
|
+
|
139
|
+
attr_accessor :re
|
140
|
+
attr_accessor :options
|
160
141
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
142
|
+
# Use the regular expression +re+ to return captures from the
|
143
|
+
# elements collected by +selector+ (treated as text) used on an
|
144
|
+
# HTML document (if +selector+ is +nil+ then match against the
|
145
|
+
# full text of the document). If the keyword argument
|
146
|
+
# <tt>:capture</tt> is specified then return the corresponding
|
147
|
+
# group (indexing is that of regular expressions; "1" is the
|
148
|
+
# first capture), else return an array of all captures. If
|
149
|
+
# +matcher+, then use it on the capture(s) before returning.
|
150
|
+
#
|
151
|
+
# FIXME Shouldn't the matcher come BEFORE the regexp capture,
|
152
|
+
# not after?
|
153
|
+
def initialize selector, re, matcher=nil, options={}
|
154
|
+
super selector, matcher
|
155
|
+
self.options = options
|
156
|
+
self.re = re
|
157
|
+
end
|
177
158
|
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
159
|
+
# Grab the first element from +doc+ matching the +selector+ this
|
160
|
+
# object was initialized with. Use the +re+ and the (optional)
|
161
|
+
# capture group this object was initialized with to capture a
|
162
|
+
# string (or array of strings if no capture group was specified)
|
163
|
+
# from the collected element (treated as text). If initialized
|
164
|
+
# with a +matcher+, then return the +matcher+'s match against
|
165
|
+
# the value of the capture(s), else just return the capture(s).
|
166
|
+
#
|
167
|
+
# m = MatchRegexp.new('span#bio/a.homepage', /Homepage of (.*)$/, nil, :capture => 1 )
|
168
|
+
# m.match('<span id="bio"><a class="homepage" href="http://foo.bar">Homepage of John Chimpo</a></span>')
|
169
|
+
# # => "John Chimpo"
|
170
|
+
def match doc
|
171
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
172
|
+
el = selector ? doc.contents_of(selector) : doc
|
173
|
+
m = re.match(el.to_s)
|
174
|
+
val = case
|
175
|
+
when m.nil? then nil
|
176
|
+
when self.options.key?(:capture) then m.captures[self.options[:capture] - 1] # -1 to match regexp indexing
|
177
|
+
else m.captures
|
178
|
+
end
|
179
|
+
# pass to matcher, if any
|
180
|
+
matcher ? matcher.match(val) : val
|
181
|
+
end
|
200
182
|
end
|
201
|
-
end
|
202
183
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
184
|
+
|
185
|
+
class MatchRegexpRepeatedly < Matcher
|
186
|
+
attr_accessor :re
|
187
|
+
def initialize selector, re, matcher=nil
|
188
|
+
super selector, matcher
|
189
|
+
self.re = re
|
190
|
+
end
|
191
|
+
def match doc
|
192
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
193
|
+
# apply selector, if any
|
194
|
+
el = selector ? doc.contents_of(selector) : doc
|
195
|
+
return unless el
|
196
|
+
# get all matches
|
197
|
+
val = el.to_s.scan(re)
|
198
|
+
# if there's only one capture group, flatten the array
|
199
|
+
val = val.flatten if val.first && val.first.length == 1
|
200
|
+
# pass to matcher, if any
|
201
|
+
matcher ? matcher.match(val) : val
|
202
|
+
end
|
221
203
|
end
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
204
|
+
|
205
|
+
# Class for building a hash of values by using appropriate
|
206
|
+
# matchers against an HTML document.
|
207
|
+
class MatchHash
|
208
|
+
|
209
|
+
attr_accessor :match_hash
|
227
210
|
|
228
|
-
|
211
|
+
# The +match_hash+ must be a +Hash+ of symbols matched to HTML
|
212
|
+
# matchers (subclasses of
|
213
|
+
# <tt>IMW::Parsers::HtmlMatchers::Matcher</tt>).
|
214
|
+
def initialize match_hash
|
215
|
+
# Kludge? maybe.
|
216
|
+
raise "MatchHash requires a hash of :attributes => matchers." unless match_hash.is_a?(Hash)
|
217
|
+
self.match_hash = match_hash
|
218
|
+
end
|
229
219
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
#
|
235
|
-
|
236
|
-
|
220
|
+
# Use the +match_hash+ this +MatchHash+ was initialized with to
|
221
|
+
# select elements from +doc+ and extract information from them:
|
222
|
+
#
|
223
|
+
# m = MatchHash.new({
|
224
|
+
# :name => MatchFirstElement.new('li/span.customer'),
|
225
|
+
# :order_status => MatchAttribute.new('li/ul[@status]','status'),
|
226
|
+
# :products => MatchArray.new('li/ul/li')
|
227
|
+
# })
|
228
|
+
# m.match('<li><span class="customer">John Chimpo</span>
|
229
|
+
# <ul status="shipped">
|
230
|
+
# <li>bananas</li>
|
231
|
+
# <li>mangos</li>
|
232
|
+
# <li>banangos</li>
|
233
|
+
# </ul></li>')
|
234
|
+
# # => {
|
235
|
+
# :name => "John Chimpo",
|
236
|
+
# :order_status => "shipped",
|
237
|
+
# :products => ["bananas", "mangos", "banangos"]
|
238
|
+
# }
|
239
|
+
def match doc
|
240
|
+
doc = Hpricot(doc) if doc.is_a?(String)
|
241
|
+
hsh = { }
|
242
|
+
match_hash.each do |attr, m|
|
243
|
+
val = m.match(doc)
|
244
|
+
case attr
|
245
|
+
when Array then hsh.merge!(Hash.zip(attr, val).reject{|k,v| v.nil? }) if val
|
246
|
+
else hsh[attr] = val end
|
247
|
+
end
|
248
|
+
self.class.scrub!(hsh)
|
249
|
+
end
|
250
|
+
|
251
|
+
# kill off keys with nil values
|
252
|
+
def self.scrub! hsh
|
253
|
+
hsh # .reject{|k,v| v.nil? }
|
254
|
+
end
|
237
255
|
end
|
238
256
|
|
239
|
-
# Use the +match_hash+ this +MatchHash+ was initialized with to
|
240
|
-
# select elements from +doc+ and extract information from them:
|
241
257
|
#
|
242
|
-
#
|
243
|
-
#
|
244
|
-
|
245
|
-
# :products => MatchArray.new('li/ul/li')
|
246
|
-
# })
|
247
|
-
# m.match('<li><span class="customer">John Chimpo</span>
|
248
|
-
# <ul status="shipped">
|
249
|
-
# <li>bananas</li>
|
250
|
-
# <li>mangos</li>
|
251
|
-
# <li>banangos</li>
|
252
|
-
# </ul></li>')
|
253
|
-
# # => {
|
254
|
-
# :name => "John Chimpo",
|
255
|
-
# :order_status => "shipped",
|
256
|
-
# :products => ["bananas", "mangos", "banangos"]
|
257
|
-
# }
|
258
|
-
def match doc
|
259
|
-
doc = Hpricot(doc) if doc.is_a?(String)
|
258
|
+
# construct the downstream part of a hash matcher
|
259
|
+
#
|
260
|
+
def self.build_match_hash spec_hash
|
260
261
|
hsh = { }
|
261
|
-
|
262
|
-
|
263
|
-
case attr
|
264
|
-
when Array then hsh.merge!(Hash.zip(attr, val).reject{|k,v| v.nil? }) if val
|
265
|
-
else hsh[attr] = val end
|
262
|
+
spec_hash.each do |attr, spec|
|
263
|
+
hsh[attr] = build_parse_tree(spec)
|
266
264
|
end
|
267
|
-
|
268
|
-
end
|
269
|
-
|
270
|
-
# kill off keys with nil values
|
271
|
-
def self.scrub! hsh
|
272
|
-
hsh # .reject{|k,v| v.nil? }
|
265
|
+
hsh
|
273
266
|
end
|
274
|
-
end
|
275
|
-
|
276
|
-
#
|
277
|
-
# construct the downstream part of a hash matcher
|
278
|
-
#
|
279
|
-
def self.build_match_hash spec_hash
|
280
|
-
hsh = { }
|
281
|
-
spec_hash.each do |attr, spec|
|
282
|
-
hsh[attr] = build_parse_tree(spec)
|
283
|
-
end
|
284
|
-
hsh
|
285
|
-
end
|
286
267
|
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
268
|
+
#
|
269
|
+
# recursively build a tree of matchers
|
270
|
+
#
|
271
|
+
def self.build_parse_tree spec
|
272
|
+
case spec
|
273
|
+
when nil then nil
|
274
|
+
when Matcher then spec
|
275
|
+
when Hash then MatchHash.new(build_match_hash(spec))
|
276
|
+
when Array then
|
277
|
+
return nil if spec.empty?
|
278
|
+
raise "Array spec must be a single selector or a selector and another match specification" unless (spec.length <= 2)
|
279
|
+
MatchArray.new(spec[0].to_s, build_parse_tree(spec[1]))
|
280
|
+
when String then MatchFirstElement.new(spec)
|
281
|
+
when Proc then MatchProc.new(nil, spec)
|
282
|
+
when Regexp then MatchRegexp.new(nil, spec, nil, :capture => 1)
|
283
|
+
when Symbol then MatchAttribute.new(nil, spec, nil)
|
284
|
+
else raise "Don't know how to parse #{spec.inspect}"
|
285
|
+
end
|
303
286
|
end
|
304
287
|
end
|
305
288
|
end
|