wombat 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +20 -20
- data/VERSION +1 -1
- data/fixtures/vcr_cassettes/headers_selector.yml +304 -0
- data/lib/wombat/dsl/headers.rb +19 -0
- data/lib/wombat/dsl/metadata.rb +1 -0
- data/lib/wombat/processing/parser.rb +10 -0
- data/lib/wombat/property/locators/factory.rb +3 -0
- data/lib/wombat/property/locators/headers.rb +17 -0
- data/lib/wombat/property/locators/text.rb +3 -2
- data/spec/processing/parser_spec.rb +6 -0
- data/spec/property/locators/headers_spec.rb +23 -0
- data/spec/property/locators/text_spec.rb +11 -1
- data/wombat.gemspec +6 -2
- metadata +6 -2
data/Gemfile.lock
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
activesupport (3.2.
|
4
|
+
activesupport (3.2.9)
|
5
5
|
i18n (~> 0.6)
|
6
6
|
multi_json (~> 1.0)
|
7
7
|
diff-lcs (1.1.3)
|
8
|
-
domain_name (0.5.
|
8
|
+
domain_name (0.5.6)
|
9
9
|
unf (~> 0.0.3)
|
10
10
|
fakeweb (1.3.0)
|
11
11
|
git (1.2.5)
|
12
|
-
i18n (0.6.
|
13
|
-
jeweler (1.8.
|
12
|
+
i18n (0.6.1)
|
13
|
+
jeweler (1.8.4)
|
14
14
|
bundler (~> 1.0)
|
15
15
|
git (>= 1.2.5)
|
16
16
|
rake
|
17
17
|
rdoc
|
18
|
-
json (1.7.
|
18
|
+
json (1.7.5)
|
19
19
|
mechanize (2.5.1)
|
20
20
|
domain_name (~> 0.5, >= 0.5.1)
|
21
21
|
mime-types (~> 1.17, >= 1.17.2)
|
@@ -24,31 +24,31 @@ GEM
|
|
24
24
|
nokogiri (~> 1.4)
|
25
25
|
ntlm-http (~> 0.1, >= 0.1.1)
|
26
26
|
webrobots (~> 0.0, >= 0.0.9)
|
27
|
-
mime-types (1.
|
28
|
-
multi_json (1.
|
27
|
+
mime-types (1.19)
|
28
|
+
multi_json (1.5.0)
|
29
29
|
net-http-digest_auth (1.2.1)
|
30
|
-
net-http-persistent (2.
|
31
|
-
nokogiri (1.5.
|
30
|
+
net-http-persistent (2.8)
|
31
|
+
nokogiri (1.5.5)
|
32
32
|
ntlm-http (0.1.1)
|
33
|
-
rake (0.
|
33
|
+
rake (10.0.3)
|
34
34
|
rdoc (3.12)
|
35
35
|
json (~> 1.4)
|
36
36
|
rest-client (1.6.7)
|
37
37
|
mime-types (>= 1.16)
|
38
|
-
rspec (2.
|
39
|
-
rspec-core (~> 2.
|
40
|
-
rspec-expectations (~> 2.
|
41
|
-
rspec-mocks (~> 2.
|
42
|
-
rspec-core (2.
|
43
|
-
rspec-expectations (2.
|
38
|
+
rspec (2.12.0)
|
39
|
+
rspec-core (~> 2.12.0)
|
40
|
+
rspec-expectations (~> 2.12.0)
|
41
|
+
rspec-mocks (~> 2.12.0)
|
42
|
+
rspec-core (2.12.2)
|
43
|
+
rspec-expectations (2.12.0)
|
44
44
|
diff-lcs (~> 1.1.3)
|
45
|
-
rspec-mocks (2.
|
45
|
+
rspec-mocks (2.12.0)
|
46
46
|
unf (0.0.5)
|
47
47
|
unf_ext
|
48
|
-
unf_ext (0.0.
|
49
|
-
vcr (2.
|
48
|
+
unf_ext (0.0.5)
|
49
|
+
vcr (2.3.0)
|
50
50
|
webrobots (0.0.13)
|
51
|
-
yard (0.8.
|
51
|
+
yard (0.8.3)
|
52
52
|
|
53
53
|
PLATFORMS
|
54
54
|
ruby
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.0
|
1
|
+
2.1.0
|
@@ -0,0 +1,304 @@
|
|
1
|
+
---
|
2
|
+
http_interactions:
|
3
|
+
- request:
|
4
|
+
method: get
|
5
|
+
uri: http://www.github.com/
|
6
|
+
body:
|
7
|
+
encoding: US-ASCII
|
8
|
+
string: ''
|
9
|
+
headers:
|
10
|
+
accept:
|
11
|
+
- ! '*/*'
|
12
|
+
user-agent:
|
13
|
+
- Mechanize/2.5.1 Ruby/1.9.3p327 (http://github.com/tenderlove/mechanize/)
|
14
|
+
accept-encoding:
|
15
|
+
- gzip,deflate,identity
|
16
|
+
accept-charset:
|
17
|
+
- ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
18
|
+
accept-language:
|
19
|
+
- en-us,en;q=0.5
|
20
|
+
host:
|
21
|
+
- www.github.com
|
22
|
+
connection:
|
23
|
+
- keep-alive
|
24
|
+
keep-alive:
|
25
|
+
- 300
|
26
|
+
response:
|
27
|
+
status:
|
28
|
+
code: 301
|
29
|
+
message: Moved Permanently
|
30
|
+
headers:
|
31
|
+
server:
|
32
|
+
- nginx
|
33
|
+
date:
|
34
|
+
- Mon, 10 Dec 2012 04:57:04 GMT
|
35
|
+
content-type:
|
36
|
+
- text/html
|
37
|
+
content-length:
|
38
|
+
- '178'
|
39
|
+
connection:
|
40
|
+
- keep-alive
|
41
|
+
location:
|
42
|
+
- http://github.com/
|
43
|
+
body:
|
44
|
+
encoding: US-ASCII
|
45
|
+
string: ! "<html>\r\n<head><title>301 Moved Permanently</title></head>\r\n<body
|
46
|
+
bgcolor=\"white\">\r\n<center><h1>301 Moved Permanently</h1></center>\r\n<hr><center>nginx</center>\r\n</body>\r\n</html>\r\n"
|
47
|
+
http_version: '1.1'
|
48
|
+
recorded_at: Mon, 10 Dec 2012 04:57:04 GMT
|
49
|
+
- request:
|
50
|
+
method: get
|
51
|
+
uri: http://github.com/
|
52
|
+
body:
|
53
|
+
encoding: US-ASCII
|
54
|
+
string: ''
|
55
|
+
headers:
|
56
|
+
accept:
|
57
|
+
- ! '*/*'
|
58
|
+
user-agent:
|
59
|
+
- Mechanize/2.5.1 Ruby/1.9.3p327 (http://github.com/tenderlove/mechanize/)
|
60
|
+
accept-encoding:
|
61
|
+
- gzip,deflate,identity
|
62
|
+
accept-charset:
|
63
|
+
- ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
64
|
+
accept-language:
|
65
|
+
- en-us,en;q=0.5
|
66
|
+
host:
|
67
|
+
- github.com
|
68
|
+
connection:
|
69
|
+
- keep-alive
|
70
|
+
keep-alive:
|
71
|
+
- 300
|
72
|
+
response:
|
73
|
+
status:
|
74
|
+
code: 301
|
75
|
+
message: Moved Permanently
|
76
|
+
headers:
|
77
|
+
server:
|
78
|
+
- nginx
|
79
|
+
date:
|
80
|
+
- Mon, 10 Dec 2012 04:57:04 GMT
|
81
|
+
content-type:
|
82
|
+
- text/html
|
83
|
+
content-length:
|
84
|
+
- '178'
|
85
|
+
connection:
|
86
|
+
- close
|
87
|
+
location:
|
88
|
+
- https://github.com/
|
89
|
+
body:
|
90
|
+
encoding: US-ASCII
|
91
|
+
string: ! "<html>\r\n<head><title>301 Moved Permanently</title></head>\r\n<body
|
92
|
+
bgcolor=\"white\">\r\n<center><h1>301 Moved Permanently</h1></center>\r\n<hr><center>nginx</center>\r\n</body>\r\n</html>\r\n"
|
93
|
+
http_version: '1.1'
|
94
|
+
recorded_at: Mon, 10 Dec 2012 04:57:04 GMT
|
95
|
+
- request:
|
96
|
+
method: get
|
97
|
+
uri: https://github.com/
|
98
|
+
body:
|
99
|
+
encoding: US-ASCII
|
100
|
+
string: ''
|
101
|
+
headers:
|
102
|
+
accept:
|
103
|
+
- ! '*/*'
|
104
|
+
user-agent:
|
105
|
+
- Mechanize/2.5.1 Ruby/1.9.3p327 (http://github.com/tenderlove/mechanize/)
|
106
|
+
accept-encoding:
|
107
|
+
- gzip,deflate,identity
|
108
|
+
accept-charset:
|
109
|
+
- ISO-8859-1,utf-8;q=0.7,*;q=0.7
|
110
|
+
accept-language:
|
111
|
+
- en-us,en;q=0.5
|
112
|
+
host:
|
113
|
+
- github.com
|
114
|
+
connection:
|
115
|
+
- keep-alive
|
116
|
+
keep-alive:
|
117
|
+
- 300
|
118
|
+
response:
|
119
|
+
status:
|
120
|
+
code: 200
|
121
|
+
message: !binary |-
|
122
|
+
T0s=
|
123
|
+
headers:
|
124
|
+
!binary "c2VydmVy":
|
125
|
+
- !binary |-
|
126
|
+
bmdpbng=
|
127
|
+
!binary "ZGF0ZQ==":
|
128
|
+
- !binary |-
|
129
|
+
TW9uLCAxMCBEZWMgMjAxMiAwNDo1NzowNCBHTVQ=
|
130
|
+
!binary "Y29udGVudC10eXBl":
|
131
|
+
- !binary |-
|
132
|
+
dGV4dC9odG1sOyBjaGFyc2V0PXV0Zi04
|
133
|
+
!binary "dHJhbnNmZXItZW5jb2Rpbmc=":
|
134
|
+
- !binary |-
|
135
|
+
Y2h1bmtlZA==
|
136
|
+
!binary "Y29ubmVjdGlvbg==":
|
137
|
+
- !binary |-
|
138
|
+
a2VlcC1hbGl2ZQ==
|
139
|
+
!binary "c3RhdHVz":
|
140
|
+
- !binary |-
|
141
|
+
MjAwIE9L
|
142
|
+
!binary "eC1ydW50aW1l":
|
143
|
+
- !binary |-
|
144
|
+
Mjg=
|
145
|
+
!binary "ZXRhZw==":
|
146
|
+
- !binary |-
|
147
|
+
ImI1M2E1MTM3ZDliM2FiYWY0MTgxZWQzOWVlZDMyZDQyIg==
|
148
|
+
!binary "Y2FjaGUtY29udHJvbA==":
|
149
|
+
- !binary |-
|
150
|
+
cHJpdmF0ZSwgbWF4LWFnZT0wLCBtdXN0LXJldmFsaWRhdGU=
|
151
|
+
!binary "eC1mcmFtZS1vcHRpb25z":
|
152
|
+
- !binary |-
|
153
|
+
ZGVueQ==
|
154
|
+
!binary "c3RyaWN0LXRyYW5zcG9ydC1zZWN1cml0eQ==":
|
155
|
+
- !binary |-
|
156
|
+
bWF4LWFnZT0yNTkyMDAw
|
157
|
+
!binary "c2V0LWNvb2tpZQ==":
|
158
|
+
- !binary |-
|
159
|
+
X2doX3Nlc3M9QkFoN0J6b1FYMk56Y21aZmRHOXJaVzRpTVdGR0t6Uk9kREp1
|
160
|
+
ZGxrMU1IUlVOVXcyU0dodldFTkZWa0ZLWm1SdFZFaDVkWFYwYlZoYU55OVJP
|
161
|
+
Rms5T2c5elpYTnphVzl1WDJsa0lpVmxNV1l3WW1RNU5XUmpOMk5tTURRMk9X
|
162
|
+
VmxNMkppTmpaaVlqazVNbVl5WWclM0QlM0QtLTZhMWY3MWRiNDFiMWRmN2E0
|
163
|
+
MmVjMzMyZDI3NWE1MTY4NDlmZTFiYzM7IHBhdGg9LzsgZXhwaXJlcz1TYXQs
|
164
|
+
IDAxLUphbi0yMDIyIDAwOjAwOjAwIEdNVDsgc2VjdXJlOyBIdHRwT25seQ==
|
165
|
+
!binary "Y29udGVudC1lbmNvZGluZw==":
|
166
|
+
- !binary |-
|
167
|
+
Z3ppcA==
|
168
|
+
body:
|
169
|
+
encoding: ASCII-8BIT
|
170
|
+
string: !binary |-
|
171
|
+
H4sIAAAAAAAAA+1czXPbRpa/669o01uRowiESFGS5VCcdRw7diaONZEySXZq
|
172
|
+
SmkCTRIWvoIPWcxUqua0pxz267DXvexh/4G5z212/4v8Jft73Q2gQYAUacmp
|
173
|
+
1FRSFRNCd79+/fp992sM73386sn5N6dP2SwL/NHWUP0wNpwJ7rI4ERPv+qQT
|
174
|
+
TR+hQxY/su1oGncDYYfpfTYZN9/ak/F9NvWyWT5uG4Rmu2i93xltMfw3DETG
|
175
|
+
mTPjSSqyk+08m1gPt80mmtkS3+Xe1Unna+vLx9aTKIh55o190WFOFGYizE46
|
176
|
+
L56eCHcqNFAJOPMyX4w+8bLn+Zj99S/sLHI87rMnkeuF06GtmhUOvhdeskT4
|
177
|
+
J51U8MSZdVg2j8VJh8ex7zmYLArtKBahanVF6iReTG8/uA78DpuBUCcdo0dX
|
178
|
+
vpYznHQUBh1m61VVk0383HMtD4soYNBiU9BZUanrRIEtO1GfbhxO1wBKOAsr
|
179
|
+
i3JnJkFb2EbAiaNUuB2Wet+L9KRzcHR9cFRM2hjR6w3UZE2UG33boGP8Nf5/
|
180
|
+
Z/CP+tdH/eXQB7fEfgDsByuwX4QvOTjkAfY6SA2Wsc49X7wIONiyYlT7jRe6
|
181
|
+
0ZvUytAoqWwy+zIoTyI/Sgwo9yfyP4yVDFz9M6y4S/GVYmSPsLCva7xmT/iV
|
182
|
+
5Cv8I7nTlMdCrniezSBgnuNl84ssuhRgVYWkkyYTK+YJD+RgLcfluGcfDD7P
|
183
|
+
+uHVNwd72fnBZ4fPZ9HXT57+/vGnEzc4fz7P8yz4+p+O7N89/OakBlHPUaIj
|
184
|
+
16MErBAO3h887Iouv+QB97qhyGyeQnmkXUNq1BstRxbv88OjY9fZ7x9zlzt7
|
185
|
+
XOxN9p2H/ODg+LDvuP3j/vhwPO6LrpOmHRYI1+NQBU4iaLlKL2RzX6QzIbJC
|
186
|
+
N2TiOrNl/5qU3BLVvtV/2O85e2PBxUNxeNQ/Pp7sieP+nisG3Bkc9vb3H/aO
|
187
|
+
B+7R0a1w3dKbrRQZSxPnpPMW5J1g98WbKLlMLSHE0WFvfOweiL1DMRnvHQ44
|
188
|
+
55OD/kNnMBCTh6J3uNcTR73ua1BMMaWk4Gt+xRUWndHQVk9aIm6JnOIG69id
|
189
|
+
TA73jg+Pnd6gh6fDvfGBM+gfOfuDw8P+oD/gYiL2B8frI1bK3NAmOwnBgeQM
|
190
|
+
x5E7Z44Pxjvp+NF0KtyLKM/YG88VCURUhFfQxJGbO2Q3WCH1rnfFPPek8yaB
|
191
|
+
4hBJIdAQaSmN5q9+galokJ6JEAB89WOpeS3MaxjC+gCymNwLMcbxYetg4Ks5
|
192
|
+
1YxDmGO1jApqZGGX3YAnlyssVW1OgjX0ginjPsxzYQM1YL01QDeyBtcAKbzp
|
193
|
+
DN3292ChNuVFqdtSOwBtIaJyS0RiE+yro38cXJOO/Q2JzfHgYO/ooBRXtdgN
|
194
|
+
0LRm0RV26J0iq+a4CeWhzbW2rhaxNcz9Yt+yKLZCflXwmO8VDeI6hh2Bl4Q9
|
195
|
+
risqQ3WWnZ6q3kxtHk06tH2PhBNWpgCpHaaVEIs+Z9K5WgJoIniWJyJdjVzV
|
196
|
+
65nub4KrYTYGC6wGpnp8hH4VkKGdwxMupE9LREPgLC7FGMiWUllsRSU+4zzL
|
197
|
+
IOpxAg5NtG6Yr5Cf1JuGedwZneGX5TGbRAmbwAbJ7a52WrJsKaN6EilYq4Bj
|
198
|
+
kV6oYXthA+TQxhprTKXflMvSHdZTS6mXCTaLAuWQmworhrDK6IJa6cHUhFo9
|
199
|
+
lDqqJG9d5SWRQffhrFenzjDNkiicjvq7R4Pebm/vCGZFvTGIGIsIbjKQSDNE
|
200
|
+
A4wk22gtQAx2Dx72dvcHB20gEgGf2suixBNppZttEx0TbbJ20RRqfuY5de2M
|
201
|
+
fQ4YdxwRZ1YRCHW+PH9mPezgNfEZgosiMCFjAfm+UH9f0FjyWLJZhIYp3JMF
|
202
|
+
YsTLWYcWULCj/fq7XCRz/dMZvf4d/UlssruU9WrjE+G6Xmarn85I/W4wnofu
|
203
|
+
PIAGSEGBS/sMjuUlAryRftgA0JhPSf87eeJ3RvTvBmMT7vmpLf/tjL7Ix3MG
|
204
|
+
+f2C/twEyNwOIxe407/wKjYZClf7e1DAdhB2XmbRBFI964ye0F/n0TP6awNo
|
205
|
+
IvF5OLWjDDrlqXp+dX66AQCHX4p4Ftv6F4jgxenzTUBwxA6J+J74wiOa0k8b
|
206
|
+
Bth+VkhdaZwwqhKxzijg4ZwFsF8EoJLIRf70whh+V+ViImxS7kwhQip++a7D
|
207
|
+
Yp87Yhb54JeTzjPEZYxmKCeFOkX4Ezk89jLuI3BGKgQxV+HvVxp5aE4581y3
|
208
|
+
ipIIjQ674n6O0U+hZebZDApnMyBK0DSQzYYiDfFW4+JyXK8x4dCGUhnapHpG
|
209
|
+
C+ZB20tlK6ShKh4rA2MqxUVNb7aRB0cGVu9vxRUT7No4ii5h3EsHs3xXuWdw
|
210
|
+
9W7tS8JGSU8yRbCsJtW+2eD4oHd0eADiwMHPZiedY+lZEmc28M3eeFlGvn2F
|
211
|
+
7rl+VWJ7cBeeb4WtnrJAdm9/b//4cDmGX6mcxOPv4YCZaL70nCRKowmC3tJF
|
212
|
+
v1uyBsUMy+jaOzhcStir4A2vI/z7l1/RmxLbPgbfIROoCZei2oPBXsIDr8dR
|
213
|
+
mnJybkuWhS6c8XdHWWhcgF+OKxJcS3CllAv0NLzFCtnP5LsXn99x+FNxbDHp
|
214
|
+
coSR8VuCcBB97/k+r3GufvWuOEFPuQzb4zrTmt61qeJcJNhDrx5JmO1eKAP1
|
215
|
+
yM+DMGVIgnM8m0FHXZdSN4acGHbOdGZn/RHCOzYUwcje/tt//e1/Mnto4w+k
|
216
|
+
L/q1jjFly5mXwgwyeKuJCIQ/ZxOeZrtMTCZIAsI/2WUw4VniIfAQLoNFSymf
|
217
|
+
QYo8iXyWztNMBEhpCKTaKYBB+pAW4PNxlCCRfiWYK66EH8UBYLFowki9kBh3
|
218
|
+
pVmp25PhPctiNtLgtLKuLyYZsywjSmlZfopkd+gi3mqjwV//gpMJkxD/9+Ps
|
219
|
+
f38cryIHHR6AIrSMsUiRz+FzlkXGkgQsQDZjEXokaZexZ0iH7bJUwJuIc9+H
|
220
|
+
OwHPOs2IqC42J0T8g5yIz+ZRnpQuT5yP4eOVPo3sW7hDiB+veAanR0cxdMBS
|
221
|
+
OSkIPm6mXELpFUk600DXqJt2FbMVnRTL6i4Gn1YbYBJfha6WzBRXhzCVyYan
|
222
|
+
hWC5dMRkoGup0BVSqxd2Sp122WkCRkNMRjSgUDiPq6V/GfpegNiSaEsUq9GB
|
223
|
+
gYvk5lLQfE+zeGWP49EzvG8buGtsJ1hWbRLx5y62Ps0FyxLuXAKnXez1pQcc
|
224
|
+
kcEP/Yi7eHTg5QONK0+82WUyvtNbjQMY6a3+9Of/Lll7mSJIkLoCIYwgvMna
|
225
|
+
KeQpT0gBMJ0FsShspYlb42QlNYykJkCYWwuZ90fnggfGSqEL9g3pi8tN2d8D
|
226
|
+
L5NIwW0vGBD8PyVJ1kE0ha4pRISkAhEP+zxiZ2fP2aUgXxoHd7BEXfYYgewV
|
227
|
+
DhGAu3BTNIqYRAB5DhfM7VKghfToFAmgRX6ucDFcQJ38sTHmtXCy1DZ0DI7T
|
228
|
+
Ri9BeAadg0ig1lILHQyNU2P3gmSmxBgpIJOwXpG/kM4uaIjjxYIdGjTFyaXS
|
229
|
+
etCXM4RmAuwjWUuxmWQmnG0C83HCQ2dG7csUiUigR6BgL9+WXAh6JNMu0MrA
|
230
|
+
3giyVlLKc1cTSmqfoEGpL4TvcZziKgnSmZgGzb4SLMXJK0Iz6EwXypfUAgul
|
231
|
+
Qgs4CSVL4bUqdWrGi0obaJYFC6NTycG7pXIdg/rgPlPHmLqX+9D3kOcr5AAI
|
232
|
+
1xLCgs5dyaN6aU1Cl2teg9IFEdchdVPaX4GEsLRQHzVrHCERuCD3pzyh0z6c
|
233
|
+
sAvmhdLukfZgHk6QkwzhPKOD8ApWEOTwYOZFXxwT+C5UAXbqpz//RwR1HQoG
|
234
|
+
ncoxjp4mjMxegHO0PN6IcyEZaqYaGQlY2bKWbCs60orqlg6WUatfUwvrsxNK
|
235
|
+
uk2iCEGcFeeUkTHUxYzklzwdZBKdS+g32W8BurSjxMUggD7qYRZbMbQw1DRu
|
236
|
+
AWIDJ6nZW6PqljOeoVseEyi4F9VJgdRhbqZrJrDGzDAJrrsyxy91bWf0WKrc
|
237
|
+
XOZ4AACHY6VHdwOAxWT8JmNlGsHB9E/UA3uPB/GHUAwxMe0auKDkAuZeJPC1
|
238
|
+
UmGeI3eKApKnZfN64NKMkpl1UGeUFVcNi0BAKzp20O7ZGnv0uCpNAbFX7hQW
|
239
|
+
F3CndVXEty85HM8bdwtAdOXCUkA6i7AeMIEUZ7yE1oTVU9W+HrAgGlM1hXGI
|
240
|
+
Ve6bamI4XW3w5MY0PyNvC77OzfRGIr475fm0iwNJoIInkT5iX4kxzBf359AU
|
241
|
+
DWxI9BZEDDSH7UPONXGFc0nVQJQQly/Yx3jzCL6ygIMAVqPDqDVopWqLUiAn
|
242
|
+
i7QUyE8Qzz2iuigwZ+jhGDpbBxSQex2NFzj802jMxhFP3EVcNqb1x5GTk7Mk
|
243
|
+
l3YzwWfCj1v3/zkaFpFpJ7WOS0VSh/OxCldB8senL9aDpIlrsKN6tJDPv4Jj
|
244
|
+
6lp0kk4xBDGHKlBDcl82QRxV03pT0XHawh5ogKfUsghk420gP/pm6sOJRR4j
|
245
|
+
nNYpd67fLiLRQv6Fmjfh5uD0LEciHYGzUuaZ4PCIk8aSWqCR3Mwikx86o3M4
|
246
|
+
Mmd4uQYyVOfowIUR1wvrAYhXqmERSp2uMyqukIUg2mlA8EBlIJTMlip+GBcd
|
247
|
+
pDvSGb3nRPH8Q9bf6/XhmsbIv+jiwb3uXv+g30PIl0QBYqd+F0mGuvBSfSP8
|
248
|
+
UgwasRehg1CLsg4EN0X8lUJnIf6qItDy5JiinBWn0VVMKfHRCwrElMsqMvhw
|
249
|
+
+snyQoShdE6ChWhE1CqLZDiVJpAL5WMIDuUqlwDVBKsqEegU2YZVDpDyOqcf
|
250
|
+
cp+0ElY7oMoRtNVcB5pMpjhIDiHLQA9vAYZCCXi8YFD9VIehCwiwyTKaL5Ms
|
251
|
+
RdmNTGDV2u4bDp6uLdqS7hzRDFG0VKgX4Ogkc/IsvcB2UEWh4jAvhEMRc9QO
|
252
|
+
XSBXQlFvgkN1DnsIHC3K4nGkwKl67aSDxB0SMfNHIVxx5TQiAfhbPQFJh5oA
|
253
|
+
HBgg2qq25n452+vUSoWw0GoViFklYp3RAzRSoPa+IokGI9OMDRdV5TQzlKEg
|
254
|
+
vVAmNZtpDzbxEiN7QTG29KOosIqVc5txjOHilkgGcACgoOq502zUYsqfwfCk
|
255
|
+
MqigQHDME+xV6cYqOW94aTdN85umCv0oodAVVR44D0yRpfZj5FW5LEVpm26B
|
256
|
+
mWQ+VBKmPR+nUxQIul0qk1b7v23uv66xBpvKpEWArBrz4Qi8JSFfN1f4Etkp
|
257
|
+
pC98pGcoQ0zmrn1tpMHLoOQmUl7eOBHF8m0k3GgaRyYSI2w+flhU+0tGCk00
|
258
|
+
ZIxNfOOBiLdGAJpp0alHPRax5JdffEa5Ng/q3eEQZRSoy1R70D4r3ppqaE3O
|
259
|
+
gXLJGKRdnWajzpdUzk1sdGrku39lpkJLZKP12Uemm9v3cSP2VT6ENhHQ+Rnx
|
260
|
+
B7xOVB799ON/aiPd3kkeZKByJqPKGeVVkCh8oCRhCe+f5WNSIMT96PFLXkA6
|
261
|
+
83CKRCpiIbmBUIrSoavX0CJMaVfKij6Xau1QHNyVnYxk8voyZprGlvMCqPEX
|
262
|
+
xD0waZTSUx5YI2tNJ1l1i0t8VdUTa8NRt7lrsx51zEYLtoBeukjIr7QG6CQz
|
263
|
+
IFW2aC2LoCZcsAmtE9atwq2mu64zj5ruHCXfyGWX9s6UgVvNdqPyUPNL69NQ
|
264
|
+
H7eauV0/vBslohaxXI38MhfSqkzUUlaok/paWjSG4dkVRqQhoaR0zFBqPQ+K
|
265
|
+
cMtGyPeZ2k8h/ARxA+X8Fy3QrSgPE7J0JhyoCGo3c8RvrwO8tpk+otw8+Urh
|
266
|
+
OCKpvZup8pVTaR18R3OhNKFJQBWiyJ1KoXFUUf2yCRV/6YCcXEGKxMqDZlP3
|
267
|
+
370pYh+jYlXnA381Sk0reIdG6Q7NhOKOeu1NFWvK/NXqqs67ZKTPRUYXzNgn
|
268
|
+
VFlRBadNn6WlPqrZaSGZQJpvAz+jZhDH3A2iEDcRfvrnfzEd5SJmBLJNyT1D
|
269
|
+
PSWyc5R6uzN1tAyrf2vDqlUha6xkzvCdo/WvbWi1enAarTsUkmWk+vc2nFrd
|
270
|
+
WI3TYjLjVkaSaG4EI8oca3/yyks9lcuj3Ke8OCPtJiWullizm8WXZrhTh0L5
|
271
|
+
Qcuo2yoeasgqIaGCDyo/oIq7OxWY1di2io0askp4TGzvVpBWo9sqTmrIKqEy
|
272
|
+
0b1DAVuNa6uYqSGrhM3EdbXgtbF+GZ3/PIZLztKQrk3C7o3SbpiMEuKqskce
|
273
|
+
4X6U4OsCyC0vVPZsYudaFZIs4EOEoGphJzhvhzENcaq1XBNtaF5bxevTPECO
|
274
|
+
PEI6Mawlxm6lcd+0adwz1PGiZFOV3tkZr2XibzXdQi5XKfhNsrn12RWX/0we
|
275
|
+
/e35eaHwtOBPFBxQqlynq94q7/N3k6DAB2VwgtfGlE98fLelkd2/FTcu5D4V
|
276
|
+
N6LsD4nbluOLW00VtS3pFSWYGZW5rjHhz8/rUKefR5mH2w5FVc2vkesvJnIN
|
277
|
+
jZ1Zbnfa3ICFw1Li+oYLrArd3zarpu4eFOGfcmu82sFhqx2geh/GqVSDo2pq
|
278
|
+
mVNP+G4QqarZ6USwEVi8xL0deQiwarrVFGz1pLSkVucOVL1Q1DlZdMC9Uc2C
|
279
|
+
TFOhcKAoh2JPEPxkKFJA6TBcm5aKAmqXX+ixqBwUyoXKber7jG+E6N1d2P2q
|
280
|
+
2AXCjhs8uInOzlE8UTsNj0fP5bdFYLLKm0pDfP9qtHWfndMpvro59Z6ffTjr
|
281
|
+
vTfNPmRwIrbuNxv7RuN9/NcYfVh0wEwEX1rHYYwimOtMncIaOCgUdiQG9JEB
|
282
|
+
VEYgchojlUu3lp2drQvZJN9yP42qpoutnZZhY1yI3sGolmHUdHGxtbXzDW5u
|
283
|
+
7Ozg+Hlnh2zGmKq8odaDHVS2lBibursubDVyf4aqg5oXgIV+iUoWuqAAiVgg
|
284
|
+
9Q57QRfMelv6oQ/iFI/ceEZJVI10rxbhKbL1ugW8vn7qb+3rp32iu4a9T7DL
|
285
|
+
P0zga67ypZc6AtfgQhFRdTTZFOXzYLXyG2FNrrr3B13K9xluIP/xga0/dEP3
|
286
|
+
kene4ftbilEfsXt/eOwrfv3jA3zw4H1zE4ht6PJmg2H0F/2qWjIUpNN1d7C+
|
287
|
+
59zb0nP/8UGjH4p7JGUBGJ9OcS6/y1Hw3gD/OGW/xW16gaJXnI6m3HMfbW2N
|
288
|
+
8Nc2SpR81MOh+oWi+0lOX33BfQI00t8ATRWtJEtU3B/jmKPbylQl3U1RXjwX
|
289
|
+
LW/kPL1GRTg+D0T3FKoKS7ULS3VBPDqbo2zrms2QGfMpO0ZYy+t+tbK16oN9
|
290
|
+
a5V81j8MyBqVn/g+FsftHnwP6WIMhsE990+evVQVVQvC8O2331bfz9qa5KEq
|
291
|
+
tZng8tD8sY+ivAcA9D77E1jNm1TPjP1DV91nv37wJ0jmo20qQdv+4X30+2Hr
|
292
|
+
hy2A3VCM49GrBJflEA5i6+TNRnktboC7O7g332Tt55BtqS7ZKT6IIO+PortQ
|
293
|
+
m7RFFKZrBGmT+mAiEhtcSKRrEY+0DHkTBs+AirWKNwz2FIyFOsqECio20kkQ
|
294
|
+
SHVvRSJFxei6rqG5jhdUvoUP1NFNNtSi5bj7ksNf5+HWt2QGuOsmpMq/ZSiD
|
295
|
+
kle/4PrSvRpckuWyKLPQ7hU3V0cz6qmwq3KpklXJsPLX/NoSSRIlVoArc+ob
|
296
|
+
g7rmFD8zJj8donpU2rZWyYmKXV3JWTyhXBAlI6QA6AqdWcrJ8PVM+siM5H9a
|
297
|
+
xxv6xI4ShQzXzOVFPwh6l53iq2KgQIYPHvEpqoK7hZorvjhVFRMGLRjgzjEK
|
298
|
+
BSxHhj3GIlG6FuDAi7DSNzpNf0POoQgklwgKbcvC1+QC6iRGbby4yLxAbOMG
|
299
|
+
Wcbx6UV8q3EbpbX7/f1j/Y7uYp1so8B2u7ZwAB3a9Gk3+uKF+jzq/wPBEivM
|
300
|
+
NlUAAA==
|
301
|
+
http_version: !binary |-
|
302
|
+
MS4x
|
303
|
+
recorded_at: Mon, 10 Dec 2012 04:57:04 GMT
|
304
|
+
recorded_with: VCR 2.3.0
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Wombat
|
2
|
+
module DSL
|
3
|
+
class Headers < PropertyGroup
|
4
|
+
attr_accessor :wombat_property_selector
|
5
|
+
|
6
|
+
def initialize(name, selector)
|
7
|
+
@wombat_property_selector = selector
|
8
|
+
|
9
|
+
super(name)
|
10
|
+
end
|
11
|
+
|
12
|
+
# So that Property::Locators::Headers can identify this class
|
13
|
+
# as an headers property.
|
14
|
+
def wombat_property_format
|
15
|
+
:headers
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/wombat/dsl/metadata.rb
CHANGED
@@ -4,6 +4,14 @@ require 'wombat/processing/node_selector'
|
|
4
4
|
require 'mechanize'
|
5
5
|
require 'restclient'
|
6
6
|
|
7
|
+
module Nokogiri
|
8
|
+
module XML
|
9
|
+
class Document
|
10
|
+
attr_accessor :headers
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
7
15
|
module Wombat
|
8
16
|
module Processing
|
9
17
|
module Parser
|
@@ -32,9 +40,11 @@ module Wombat
|
|
32
40
|
if metadata[:document_format] == :html
|
33
41
|
@page = @mechanize.get(url)
|
34
42
|
parser = @page.parser
|
43
|
+
parser.headers = @page.header
|
35
44
|
else
|
36
45
|
@page = RestClient.get(url)
|
37
46
|
parser = Nokogiri::XML @page
|
47
|
+
parser.headers = @page.headers
|
38
48
|
end
|
39
49
|
@response_code = @page.code.to_i if @page.respond_to? :code
|
40
50
|
parser
|
@@ -6,6 +6,7 @@ require 'wombat/property/locators/iterator'
|
|
6
6
|
require 'wombat/property/locators/property_group'
|
7
7
|
require 'wombat/property/locators/list'
|
8
8
|
require 'wombat/property/locators/text'
|
9
|
+
require 'wombat/property/locators/headers'
|
9
10
|
|
10
11
|
class Wombat::Property::Locators::UnknownTypeException < Exception; end;
|
11
12
|
|
@@ -27,6 +28,8 @@ module Wombat
|
|
27
28
|
PropertyGroup
|
28
29
|
when :follow
|
29
30
|
Follow
|
31
|
+
when :headers
|
32
|
+
Headers
|
30
33
|
else
|
31
34
|
raise Wombat::Property::Locators::UnknownTypeException.new("Unknown property format #{property.format}.")
|
32
35
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
#coding: utf-8
|
2
|
+
|
3
|
+
module Wombat
|
4
|
+
module Property
|
5
|
+
module Locators
|
6
|
+
class Headers < Base
|
7
|
+
def locate(context, page = nil)
|
8
|
+
super do
|
9
|
+
context.headers.select do |k, v|
|
10
|
+
k.to_s.match(@property.wombat_property_selector)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -13,7 +13,10 @@ describe Wombat::Processing::Parser do
|
|
13
13
|
@metadata.path "/search"
|
14
14
|
fake_document = double :document
|
15
15
|
fake_parser = double :parser
|
16
|
+
fake_header = double :header
|
16
17
|
fake_document.should_receive(:parser).and_return(fake_parser)
|
18
|
+
fake_document.should_receive(:header).and_return(fake_header)
|
19
|
+
fake_parser.should_receive(:headers=)
|
17
20
|
@parser.mechanize.should_receive(:get).with("http://www.google.com/search").and_return fake_document
|
18
21
|
|
19
22
|
@parser.parse @metadata
|
@@ -22,10 +25,13 @@ describe Wombat::Processing::Parser do
|
|
22
25
|
it 'should correctly parse xml documents' do
|
23
26
|
fake_document = double :xml
|
24
27
|
fake_parser = double :parser
|
28
|
+
fake_headers = double :headers
|
25
29
|
@metadata.document_format :xml
|
26
30
|
@parser.mechanize.should_not_receive(:get)
|
27
31
|
RestClient.should_receive(:get).and_return fake_document
|
28
32
|
Nokogiri.should_receive(:XML).with(fake_document).and_return fake_parser
|
33
|
+
fake_document.should_receive(:headers).and_return(fake_headers)
|
34
|
+
fake_parser.should_receive(:headers=)
|
29
35
|
|
30
36
|
@parser.parse @metadata
|
31
37
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Wombat::Property::Locators::Headers do
|
4
|
+
it 'should fetch a list of HTTPResponse headers filtered by a regexp' do
|
5
|
+
VCR.use_cassette('headers_selector') do
|
6
|
+
regex = "^s.*" # filter headers like: server, status or set-cookie
|
7
|
+
|
8
|
+
result = Wombat.crawl do
|
9
|
+
base_url "http://www.github.com"
|
10
|
+
path "/"
|
11
|
+
|
12
|
+
headers regex, :headers
|
13
|
+
end
|
14
|
+
|
15
|
+
result.should_not be_nil
|
16
|
+
result['headers'].size.should >= 1
|
17
|
+
result['headers'].should_not be_nil
|
18
|
+
result['headers'].each do |key, value|
|
19
|
+
key.match(regex).should_not be_nil
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -11,7 +11,17 @@ describe Wombat::Property::Locators::Text do
|
|
11
11
|
locator = Wombat::Property::Locators::Text.new(property)
|
12
12
|
|
13
13
|
locator.locate(context).should == { "data1" => "Something cool" }
|
14
|
-
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'should locate text property with xpath selector using xpath functions' do
|
17
|
+
context = double :context
|
18
|
+
context.stub(:xpath).with('concat(/abc, /def)', nil).and_return " Something "
|
19
|
+
property = Wombat::DSL::Property.new('data1', 'xpath=concat(/abc, /def)', :text)
|
20
|
+
|
21
|
+
locator = Wombat::Property::Locators::Text.new(property)
|
22
|
+
|
23
|
+
locator.locate(context).should == { "data1" => "Something" }
|
24
|
+
end
|
15
25
|
|
16
26
|
it 'should locate text property with css selector' do
|
17
27
|
fake_elem = double :element
|
data/wombat.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "wombat"
|
8
|
-
s.version = "2.0
|
8
|
+
s.version = "2.1.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Felipe Lima"]
|
12
|
-
s.date = "2012-
|
12
|
+
s.date = "2012-12-15"
|
13
13
|
s.description = "Generic Web crawler with a DSL that parses structured data from web pages"
|
14
14
|
s.email = "felipe.lima@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -36,10 +36,12 @@ Gem::Specification.new do |s|
|
|
36
36
|
"fixtures/vcr_cassettes/error_page.yml",
|
37
37
|
"fixtures/vcr_cassettes/follow_links.yml",
|
38
38
|
"fixtures/vcr_cassettes/for_each_page.yml",
|
39
|
+
"fixtures/vcr_cassettes/headers_selector.yml",
|
39
40
|
"fixtures/vcr_cassettes/xml_with_namespace.yml",
|
40
41
|
"lib/wombat.rb",
|
41
42
|
"lib/wombat/crawler.rb",
|
42
43
|
"lib/wombat/dsl/follower.rb",
|
44
|
+
"lib/wombat/dsl/headers.rb",
|
43
45
|
"lib/wombat/dsl/iterator.rb",
|
44
46
|
"lib/wombat/dsl/metadata.rb",
|
45
47
|
"lib/wombat/dsl/property.rb",
|
@@ -49,6 +51,7 @@ Gem::Specification.new do |s|
|
|
49
51
|
"lib/wombat/property/locators/base.rb",
|
50
52
|
"lib/wombat/property/locators/factory.rb",
|
51
53
|
"lib/wombat/property/locators/follow.rb",
|
54
|
+
"lib/wombat/property/locators/headers.rb",
|
52
55
|
"lib/wombat/property/locators/html.rb",
|
53
56
|
"lib/wombat/property/locators/iterator.rb",
|
54
57
|
"lib/wombat/property/locators/list.rb",
|
@@ -61,6 +64,7 @@ Gem::Specification.new do |s|
|
|
61
64
|
"spec/processing/parser_spec.rb",
|
62
65
|
"spec/property/locators/factory_spec.rb",
|
63
66
|
"spec/property/locators/follow_spec.rb",
|
67
|
+
"spec/property/locators/headers_spec.rb",
|
64
68
|
"spec/property/locators/html_spec.rb",
|
65
69
|
"spec/property/locators/iterator_spec.rb",
|
66
70
|
"spec/property/locators/list_spec.rb",
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: wombat
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-
|
12
|
+
date: 2012-12-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
@@ -198,10 +198,12 @@ files:
|
|
198
198
|
- fixtures/vcr_cassettes/error_page.yml
|
199
199
|
- fixtures/vcr_cassettes/follow_links.yml
|
200
200
|
- fixtures/vcr_cassettes/for_each_page.yml
|
201
|
+
- fixtures/vcr_cassettes/headers_selector.yml
|
201
202
|
- fixtures/vcr_cassettes/xml_with_namespace.yml
|
202
203
|
- lib/wombat.rb
|
203
204
|
- lib/wombat/crawler.rb
|
204
205
|
- lib/wombat/dsl/follower.rb
|
206
|
+
- lib/wombat/dsl/headers.rb
|
205
207
|
- lib/wombat/dsl/iterator.rb
|
206
208
|
- lib/wombat/dsl/metadata.rb
|
207
209
|
- lib/wombat/dsl/property.rb
|
@@ -211,6 +213,7 @@ files:
|
|
211
213
|
- lib/wombat/property/locators/base.rb
|
212
214
|
- lib/wombat/property/locators/factory.rb
|
213
215
|
- lib/wombat/property/locators/follow.rb
|
216
|
+
- lib/wombat/property/locators/headers.rb
|
214
217
|
- lib/wombat/property/locators/html.rb
|
215
218
|
- lib/wombat/property/locators/iterator.rb
|
216
219
|
- lib/wombat/property/locators/list.rb
|
@@ -223,6 +226,7 @@ files:
|
|
223
226
|
- spec/processing/parser_spec.rb
|
224
227
|
- spec/property/locators/factory_spec.rb
|
225
228
|
- spec/property/locators/follow_spec.rb
|
229
|
+
- spec/property/locators/headers_spec.rb
|
226
230
|
- spec/property/locators/html_spec.rb
|
227
231
|
- spec/property/locators/iterator_spec.rb
|
228
232
|
- spec/property/locators/list_spec.rb
|