wombat 0.2.2 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Wombat
2
2
 
3
- [![CI Build Status](https://secure.travis-ci.org/intridea/omniauth.png?branch=master)](http://travis-ci.org/felipecsl/wombat)
3
+ [![CI Build Status](https://secure.travis-ci.org/felipecsl/wombat.png?branch=master)](http://travis-ci.org/felipecsl/wombat)
4
4
 
5
5
  Generic Web crawler with a DSL that parses structured data from web pages.
6
6
 
@@ -10,6 +10,8 @@ Generic Web crawler with a DSL that parses structured data from web pages.
10
10
 
11
11
  Creating a crawler:
12
12
 
13
+ ###### Create a class that includes ``Wombat::Crawler``:
14
+
13
15
  ```ruby
14
16
 
15
17
  # => github_crawler.rb
@@ -40,25 +42,28 @@ class GithubCrawler
40
42
  end
41
43
  ```
42
44
 
43
- Running it:
45
+ ###### Run it by calling the instance method ``crawl``:
44
46
 
45
47
  ```ruby
46
- irb> GithubCrawler.new.crawl
47
- =>
48
- {
49
- "headline" => "1,316,633 people hosting over 3,951,378 git repositories",
50
- "what_is" => "GitHub is the best way to collaborate with others. Fork, send pull requests and manage all your <strong>public</strong> and <strong>private</strong> git repositories.",
51
- "explore" => "LOVE GitHub",
52
- "benefits" => {
53
- "first_benefit" => "Team management",
54
- "second_benefit" => "Code review",
55
- "third_benefit" => "Reliable code hosting",
56
- "fourth_benefit" => "Open source collaboration"
57
- }
58
- }
48
+ my_crawler = GithubCrawler.new
49
+ my_crawler.crawl
50
+
51
+ #=> the line above outputs:
52
+
53
+ {
54
+ "headline" => "1,316,633 people hosting over 3,951,378 git repositories",
55
+ "what_is" => "GitHub is the best way to collaborate with others. Fork, send pull requests and manage all your <strong>public</strong> and <strong>private</strong> git repositories.",
56
+ "explore" => "LOVE GitHub",
57
+ "benefits" => {
58
+ "first_benefit" => "Team management",
59
+ "second_benefit" => "Code review",
60
+ "third_benefit" => "Reliable code hosting",
61
+ "fourth_benefit" => "Open source collaboration"
62
+ }
63
+ }
59
64
  ```
60
65
 
61
- ### More advanced constructs like loops, following links, callbacks, etc. to be added/documented soon.
66
+ For more documentation, please see the [wiki](http://github.com/felipecsl/wombat/wiki)
62
67
 
63
68
 
64
69
  ## Contributing to Wombat
@@ -73,5 +78,5 @@ irb> GithubCrawler.new.crawl
73
78
 
74
79
  ## Copyright
75
80
 
76
- Copyright (c) 2011 Felipe Lima. See LICENSE.txt for further details.
81
+ Copyright (c) 2012 Felipe Lima. See LICENSE.txt for further details.
77
82
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.2
1
+ 0.2.3
@@ -0,0 +1,254 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: https://www.github.com/explore
6
+ body: ''
7
+ headers:
8
+ accept:
9
+ - ! '*/*'
10
+ user-agent:
11
+ - Mechanize/2.1 Ruby/1.9.3p0 (http://github.com/tenderlove/mechanize/)
12
+ accept-encoding:
13
+ - gzip,deflate,identity
14
+ accept-charset:
15
+ - ISO-8859-1,utf-8;q=0.7,*;q=0.7
16
+ accept-language:
17
+ - en-us,en;q=0.5
18
+ host:
19
+ - www.github.com
20
+ connection:
21
+ - keep-alive
22
+ keep-alive:
23
+ - 300
24
+ response:
25
+ status:
26
+ code: 301
27
+ message: Moved Permanently
28
+ headers:
29
+ server:
30
+ - nginx/1.0.12
31
+ date:
32
+ - Tue, 14 Feb 2012 08:26:09 GMT
33
+ content-type:
34
+ - text/html
35
+ content-length:
36
+ - '185'
37
+ connection:
38
+ - keep-alive
39
+ location:
40
+ - https://github.com/explore
41
+ body: ! "<html>\r\n<head><title>301 Moved Permanently</title></head>\r\n<body
42
+ bgcolor=\"white\">\r\n<center><h1>301 Moved Permanently</h1></center>\r\n<hr><center>nginx/1.0.12</center>\r\n</body>\r\n</html>\r\n"
43
+ http_version: '1.1'
44
+ recorded_at: Tue, 14 Feb 2012 08:26:09 GMT
45
+ - request:
46
+ method: get
47
+ uri: https://github.com/explore
48
+ body: ''
49
+ headers:
50
+ accept:
51
+ - ! '*/*'
52
+ user-agent:
53
+ - Mechanize/2.1 Ruby/1.9.3p0 (http://github.com/tenderlove/mechanize/)
54
+ accept-encoding:
55
+ - gzip,deflate,identity
56
+ accept-charset:
57
+ - ISO-8859-1,utf-8;q=0.7,*;q=0.7
58
+ accept-language:
59
+ - en-us,en;q=0.5
60
+ host:
61
+ - github.com
62
+ connection:
63
+ - keep-alive
64
+ keep-alive:
65
+ - 300
66
+ response:
67
+ status:
68
+ code: 200
69
+ message: OK
70
+ headers:
71
+ server:
72
+ - nginx/1.0.12
73
+ date:
74
+ - Tue, 14 Feb 2012 08:26:10 GMT
75
+ content-type:
76
+ - text/html; charset=utf-8
77
+ transfer-encoding:
78
+ - chunked
79
+ connection:
80
+ - keep-alive
81
+ status:
82
+ - 200 OK
83
+ etag:
84
+ - ! '"0f503f5e8996278ddcd2f781d802ed60"'
85
+ x-frame-options:
86
+ - deny
87
+ x-runtime:
88
+ - '186'
89
+ set-cookie:
90
+ - _gh_sess=BAh7BzoQX2NzcmZfdG9rZW4iMUV1OXhZOXRablNvWWZVRlUzZy9PL3pwa002cjVJOUN0cEg2ZjZxS2puVFU9Og9zZXNzaW9uX2lkIiUyYzIwYTRiZTAwYzViNWRmZjUxYjk4MjRlOWFmM2IwZg%3D%3D--d7f3dfe98a221c6f0b0488b0f4e3b15a9a50da5f;
91
+ path=/; expires=Sat, 01-Jan-2022 00:00:00 GMT; secure; HttpOnly
92
+ cache-control:
93
+ - private, max-age=0, must-revalidate
94
+ strict-transport-security:
95
+ - max-age=2592000
96
+ content-encoding:
97
+ - gzip
98
+ body: !binary |-
99
+ H4sIAAAAAAAAA+1dWXfbRpZ+168o0zNW4jZIAqS4yBJ75C2WY8UaS44nyclR
100
+ QKBIwsIWAJTE9KQf53mWtzln/sb8gfkn/Uvmu1WFjQBpyqK6X9rHhwSBQi33
101
+ 3rr7LR08ePHu+fkPpy/ZLPHc0c6B/GLsYMZNe7TD8O/A44nJrJkZxTw53J0n
102
+ E22wW3w0S5JQ47/OnavDxr9oH46054EXmokzdnmDWYGfcD85bBy/POT2lDfk
103
+ m6LjxElcPnp5E7pBxJnGvnGS1/PxQUvel4O7jn/JIu4eNmJuRtaswZJFyA8b
104
+ Zhi6joVRAr8VhNyXT20eW5ET0t0/3Hhug80iPjlsFFo0xW0xwmFDDthgLbWc
105
+ fLCJO3dszcHs0z5olfF+qzV1ktl83LQCryUaUZtm6E/rOhVLkB9FUKYgMefJ
106
+ DLBxLCdZXCTBJcdgvulhYlYcTbTQjExPTE6hIH3v5Xx488Mw+dE/C36YfHj1
107
+ oTNtvWv9Fl6e9KK94+HzJHzdm/R+/faTf/7hsNSjGgM9FoArQZQuzzS6gyZv
108
+ mpemZzpNnyctMwbe42Zh3XGycHk847jdGs99Gz8UWLTOwO5Yg0FP550BH/bG
109
+ 6G3Ch22ja00Gk063w9vDnt0ed5tWHDeYx23HBGatiNPaJZqzzlNUJ/wmaYn2
110
+ JTxtc96GZuvtLp+M93qYomEPjZ6l98YDa9hr872xZfM9zo2JPjDuNHEFdkmj
111
+ LI6sw8Zt4P7JvDLluzncP/0659FC0yeTbr+nm217OOj1zGF30DF0bloDe9gb
112
+ 2uOObenGEChpfgIY5Q4SYM27bIwOWrJztRm2OUtJPFp3wPV+p6+3+cAY8LEx
113
+ 6Jg9Y9wH1Cd2GwvoG9awb/V6m89SQhTUfNCSHItuHIwDe8EsF5R72HCD6ZTb
114
+ F8E8oc3I/SstjAJ7bhGPYIob7ahuxK7A+7ZzxRwbyAET5BGYmOwqieYc1+BD
115
+ E+emyMiovWpD/M50fB7VNUTX4KSys9hJuIbJBasZTGEIMa0HmvaTM2HHL3/O
116
+ mah44HhTZrpgsilDU2MosMtRbk1ujmdOsbM9QIt2uARGizq76hPH+6PeMTqD
117
+ gdHX+9nWlADcbEraLLgi6G51YrLTdHrdjrHX6feq03vwE/dtZ/Kzpt0elFr3
118
+ BjjjznQGiHfaW17AP3VvtgLc7k0K3/uc6ibgPmiZSuCk1CG/iZ7VjvPMS87i
119
+ ObSAJGDzmLPJ3HUX7Ne56ToTh9vsw/u3MWCOBrHjW2g2c2LIyiv1Pn7gLZth
120
+ S/MoCiIWEunSzwAiNmJ2AGnmx6J1ivKDuZtt7CDU0BnLeUVxd7tO2i6MIKsh
121
+ 6kfYxmXxU5COoWv6cWN05kz9echM32an8jWCw0HLdQoaUN41l1rQ+q6zRqnO
122
+ lGpM5Y4P8m4n3EwAVsxn3ZTzVq9U+7VzHQNO6zuULZ6h3XJHhbnhqeOv70c0
123
+ +WPEsQYfGtLhPxqvMhi8pWfl7g9acyixiptLIlMEQgICXDoFvfqhHkruLzhp
124
+ gZMTg17BxOv4/RKzplkUeyOCJA4qmh3M9EztTVGIW/SkQJTmOGbhJ1NKGsAt
125
+ R2ErBUImTLjLrYTb4EtgS65kTarRBZF+Ix2vCLBynxEPA6w5iByQS10/okFj
126
+ 9L7QbnVv2ATTOW3C2q7yp6O3acPVfSWOx6Gaw5qoWV32cHSumq3uKDUhCt1Y
127
+ gc0v1P3RmTAx8vcVPZXwmCIEyNpJyUh+Ez9jrWaKacFoSjRgBe7c82OmEIOf
128
+ 2bUkiwJdSaQJAi0On0Qkt/ypVsKWpCkjY2dpIyI4wa9EP+fqNhMYlF3Hoeln
129
+ rwF+wK/aFOeBbS7Yv6pfGfdISa91zfllY/QRnwSvNQ09aESzxuiEvj7TdAI7
130
+ kFSC0St5Qc3lPFs0UbHM1swQ30HGvSPTv+R2FSK05h2icWqe7yuCnBYnZpKu
131
+ tcCSrs3EgrzIoZCv+5PpROaneDBsnQdza7aAjtrKmqcG5cfs/V63U5h9yvSL
132
+ nDmILjcdB2bYNZpnJibAQ+/qw8oQimKJxXSYgl0qq6DsqyU0RtmlwEiL9FJl
133
+ KtesszHKlqwGBBY6QqYfhCn1FEzvjIaOIK290OVMcCTtWuhM7A3MmDNhpuP+
134
+ ODKjBQPimc0h7P0pu4aBDTUAMGYgBj8hSj1ohcTXpfRUJLk91HoWZjTlLR+8
135
+ AGQUJ85ksQ630CkrgMccC1J3BW5rB1qF3EFljLXIVX03Ruqigti6wRuj4ppv
136
+ h1x6E5uAvX95ds4mcFVwolEWh9yCzmaZpMB53PQTgd1rPmYxj64cKG9Hp8f3
137
+ j1Q/SFxyDsVgmTy5cvg1Nj0UyGAameHMsdYheFCF/Ub4/eyYq3Bt3A7XahzC
138
+ nlhkBdefm0hjtAost6OBsxkZyOw8WLA/sG+CYIq9fmKG+HFq+gFowrGY0lWj
139
+ e9/GCTgHMNwaB0GC5ZnhOhTrwyeDbrsC9o3QXB1oBV47T3qdvcoYa7ex6rsx
140
+ UhcV1FYGb4yyBd8Oea/PT94+Yc/Pzp4IK+XNGbhu4F462LBR4LFzCc57R1uc
141
+ YNDYDqJJ6xQeT9c1b9aLVmNQ3S4b4a1+pBW4qyGOtYjLO6fNlS6pgr7aOTRG
142
+ +cpvh0OTvS2IUJKsym2XzMwEPiE3uIaOacYLYRHjQwIYcraCVyFWi+aZGSeZ
143
+ JC9Zj/CCJ0EYL8gJnjvH6A6sucwXdS5u3Nm7o5RlNaJ0O+l7Q2PQ3RN+nZrt
144
+ FVAkQ5p7yuM6y3RjZeraWlk3xsqLJj1WOOHcjpv0OYbxySPh8YczAYEQf8rJ
145
+ Ak41H2oDe38+JtCPeTYhxpQ1bRc1bqW/YsCCSrpGg92mDus5bjCbY/4toWc7
146
+ fB2HNIxeBbYbbbPqKCu2mFEVs2u3WNYxlJx0JZUNVhm9MVKLvd3Oei8hxOBa
147
+ MhmAFSPWBOlGTHMPkYo4DPzYueLsYxDZp/gdF3SgsamcUT9+eP9sN2avAkRJ
148
+ RKyqmW07sdsy7ZlibLkrZJeiAooG4a2+WTSnQrYKGvxzVCLD1p87rcWP9vsf
149
+ rj54Uedlixzs3cHecNjr78JWN212PuPseUq1mM1RhJATlkLwkIp1Sa/eJsG5
150
+ gBCiOoFpt97i8r24NNZRXe8LefuKkVZQXrdC2WsJL++8McqvK6RXP4fGqLD0
151
+ 29Fg/iLiqJ7HI8sxXfbh+F6IqHezMF8HSfDx/ISIqG209f5QN/72RBR5ljXn
152
+ rfcIMcM2W2t1G0a/gtmNeNbyGCvoRq9qjGsJR3YLDiSWUCGYpVHJtybXeDsy
153
+ Sd8Shtbp61PJs2ZzDxF4MKzzU/Ar0W9qbzfZcSJNc/Kuw7EeXLNFMGcOYvWR
154
+ aSXSBpeuc3KDNpv3w7eef/yh/+blTT9+QyRn9HrDfq/f3vvbk5xvQn/y42Ac
155
+ kOPJ4hoyDNZxrX5Vy9+I7GrHWUF7VZ/DWtLLu4aZmC2nQoJ1M4ACmy76doSY
156
+ +XvIngiDax4hjgM1yYEbR4sdmzOywwuCUmip5DaIQXvsNx4FGszzK+RD3A/B
157
+ RYvvJkf9K+OTHhDB7bX7g/6w0/vbE5w1hTY2DVrYfJCTF2aSROvoTd/7Qt2s
158
+ bpwV9FYl6bX0pnpujNRFhdJqhm6MCuu9Hal9JMo5EtA6gSPKZS6fJAxR/nuR
159
+ jS+++fBq+toKxoMu0U2300FUuTf8ErqRQAR3UAYKrkpBJ5uPTXjApTUmfd1C
160
+ VdzEPMlskxgcfQP7pKQZstPAtmDzyZFzMyUcPYuCOTluEZslKTFeFNzFpm16
161
+ cKZbjdERrtgZLoMrx5RBAWIDuWcZPHR6vfARi/qIT/Ydp9AsIkE2Yb4pNFGx
162
+ 0gI4MmuN3ImUwhHKOWoIsi4oUCDewDu5gSdSu7IHjL0MnRgEwtrNftNgjzzb
163
+ jGdP2fcm+Axck8LZfOKQ19912Ws8czzoQKpbdCziDshU8ygWKBJKPsXwEbvQ
164
+ 5q+4BqUeUSn6pBaHDehMhtbG/+G53t7vtPfbba09wGfmus9bsLQFwh18HM3J
165
+ D94ePmHUgnLgPE4ZOSrsIRYpcJKuOJRQtEyBGkSaVyxFOnMA6mzNsCg+IVpI
166
+ 6ExM95KZY4LslRMlFG7/DWF0ZMk4UeB75H1/wl7wq3chvgmbHgIzElXpPArY
167
+ GgP3fgEzRUw6PmzoMtqWSbpEvcLU0dt7rW57aHQ0LrGotbW+Zmjw1hP2NFqy
168
+ 5insabMUe00v7GTUT/NhFAWbU+aTzPujezntCBg/ejjs7XWfFiGea5JLe++e
169
+ JmpS4sPyTOnm0lT/8j//jf/1M12K5+XBP0XKWbRmHgl7tDFqE5GWO8uJrojB
170
+ a3hU85kU0A56mpIBXNh0xffI5KOIZU2z8mZXJJL19sgfxyEwUojb0yYo/Cz8
171
+ yC7VRfWRZC6500WxktK0i8kZVZ5b9P4I+kQYNWnp3T0YSv12vzuEFyhEnFpu
172
+ laLac/L9c7k/j1x+Aw+59dy8zshLsTDuZcghrnIrpqNr4DtGWzAdfZ8Y0DLT
173
+ US0E06EWjdELbnFvDO+90RZMR8+YDvdSvkrwXkpYyR9tDKDOoN/p6Ybe1Smy
174
+ m7pCBD+RNsaP82jM4OKwYYxsHSzt/nm7t7/X2e8OVoCl3WdpiwJY2v17BouB
175
+ /Mc2MtS6PaIbRKkkiSAJwWdn1iyAaebaAkrPIgf3/nmOHbJl8Ohau0Pg6RhC
176
+ VPWXRBWohloQeESLxug75OwJqml37hk8ekfvI00U/xqjb6Lg2pUEAy2CxcE8
177
+ QhwRJgPJtaMwhN5BKeRCBD6fRfDZIUQeL/B0y/BqAyDn2FwG7a9aeFELlrZo
178
+ jN5ZSUDg0vX7BRc8NZ09fdA1+o2R9FE+C+CvjCDqEjAk0pEj3/ktysW4hNap
179
+ CY54DIiJ3IxM78Hevhs/ag80fUiQaq+iLNGCICVaQH+cQ0QnDIG5e+VGg+6g
180
+ rxvtvtGBu+X7E7m/XpydSHB8NBc+Z2d8ir22dYC0u+f63r6uYy/VkQ4A0u6y
181
+ tEUGkHb3ngGit40OWLQOHDwnTZm8SEdwNiKGL4HyMnIuoSRD7eFurL0jciau
182
+ dGLesHdTm28bUNDwegSolXusTS0IUKJFY/RmjlQDo3e/YOrv9btGt93vgW5O
183
+ g+sn7L3pIH+r09TZEVVIsFMnFFlqCKkiRTXAVzBBaEhmuixvuzNhK/EQ9R+x
184
+ jN9vcev1NR1MfbCvd1YwKQBQB1NXLRQA9Xtm6L1+r93t7vX1LujsxXcINwt2
185
+ fT5Dbm7MXsDrg0gLyOr9ArLuWye69MxtU1ZPM3ShI3VXAYZaCB2JWhBgwA2M
186
+ e2bdPb096PWHg/ZeY3QMsyuFzKuI27TzXgcuTOzo//7X2zJH6sFUFXTS3d+r
187
+ 50jUQtAJtVDgaBsbbbQlP4fUx1WKpEqEbBYTIUW2pFSFpbFbUvvLloIoKrtJ
188
+ NKofEKZcvZGQmqiUlzlBUgj4Vp7vL0wUKvKQTxoqW47GxntFA2Yehjy6UM2U
189
+ iCw+ry35SJdSLNo4mHXFiGPXtC6TANVrFw4UfZkBjEC+fwntd9YdHdRUJ+Ae
190
+ Sj8eUO1HtZt1feRQLQSc5WIukPNeMNrgRemqyYhp5NgoJQrXFMIJDwIEFn0R
191
+ jZaNBkoqWJt+vjpbnLwen3l5k9z1z3YiUGhhCc/lBXtkeuFTdgbUB9GXLAnp
192
+ R44vrO9zdbUJXBDyhcsFShv0MV6suMsQ/DJ7vGF/lOg6L1XvUWkCMtvhqcOD
193
+ Si+Zj1dsxY1I5hxZQ5Jw11AMVU7ypDmFu6qJYiIsCFc83mdHvukufoPujgRB
194
+ QG2C1MHKpGqoAP0h0xBlI5HNrUuRiQIjStyAw8q63EfVBY8BTeHmqK6zpktZ
195
+ 3RljklmJJ8E9TvYhyqEXxb4DTpBs1Bem55lWLQpJvYIHYMNFesEYany5o+M4
196
+ RjRP6GnO6SzwhZHzuT2HGX0KxkuU8CYYs2eBGQkXbKmHL6CDlzfA3yaEAO90
197
+ WAuaMzyozKQGU1hLALvK5jflbsifTQYXHlS6+YIFvQisOflABQl9jiViTvAh
198
+ 16/rNR5UJlS/LhtpJy4KmqPyyuCDlbcpeXbDrhQZF0qTVD3gxDWvYC7bmmdG
199
+ l3Zw7dN2FLXX7JV6BAqVjzYcS9Rb1eL0lJ5Ueilio6QaiKobKaVL94tCuFpZ
200
+ gTS3soj+rHwW3A2KQVpQmakDLp+arpDLFTFclMGp9iCaF9pmEQmqRBI8VPz7
201
+ nBSjZbfA+T24I8/piwUTmKEiX7oCvE37gxi5Mi2oRyhAo4sv6ohb8wjxWHBX
202
+ dbXcS2ljHYSjR1YQLp6KMIYMn6Re93ZTH/T7XUqUQnrphOt6MyrSTEqEyhnN
203
+ jn2ryY4QlomobCCmpCsAhNuFUJFwBkul8qHABANPbMlL4LCgUxYJQuRuBXn0
204
+ COhZ8vBfX183kQhxKeLgpTRHKn8t6kyF4tsXqGaHTof4CSGOKluzyktkKt61
205
+ ylWqbK1sVhc0E1EpemU0YErZyQzVqINSqSuhKiNCGK3IxbIpfEdes88uGcXR
206
+ 2YqyTuTKxH4mY+1gHLVG67uyELezpXx+TpdZV89xSsQ8gY4kOgO5v08BDosH
207
+ NRh4ICMzEZ+W4jJAVxoEqGcdpbtF1iAZh9qRRdopWAdifqpui5T1S74Yk4S8
208
+ gNSKEmuexBeIF9HJFjIciLLSxAxNMhFQ8UleyAhJdyakNnaNBislMoF8OtTg
209
+ sGE7MYWe9n3IbEFEiFuOvlUDMEg/OQC2jYd4Z64zP8xGQ/Ax5lzDUy2dmJZN
210
+ rDH6Cg8p6/druUlVN2ncsMwUZS1agrIfTpVoiqirbdjEiYqZwKj8EQokSA46
211
+ UTpnWRGkQGtntVnZJD0c2AGUloNEyQiUZCc5jR7YNiIE1jxGdhKGkHV5aKKO
212
+ IhGYz/jqwebD/LE6DHzqKDZC3FSUEpPoRuzNFBWrdcOVaErar00BmJymakDn
213
+ OTZOx0jxv1vEvzpChWigA7PDQxgT6VkxjA0qrbo1ID9VV3gC8xggpMpQOuuA
214
+ BH392jCHzUGJyP4yxpYGmkPXqQPhrYaxyAE9CqKDFr5YUPolbKTqNN5RaIDy
215
+ JB0A8c4TgLRcXicqWchBhVp0ip07EEkWqlx8qnUiZRzumbpRcTcTUuCDtO02
216
+ oBwwl+TzdHOKMvlCct/fqYdUI6Btc3pxyJbaGHFxk/CSbvkazMZNZFoiDQKe
217
+ YaV8CK4g+X/d/i8yjQjnbGS+LMECiDdIay+ruKQFFo8bqWfjda2WGPnG21GC
218
+ dInB0E0bRb1rWQwaFfj1LQdcYjS1A5ZZzZ2Gg8VY3O9yuHMcHoMc2oyJFgnl
219
+ TqN9lkDl+IKlVUi0PHINFRb4SypIlkmGKdFUlL0bSW2aWDKCDVqFFtmMBYED
220
+ rz6pQluD2YfNxpz7Wx2VmENBQ5F4gSaLXJ3tk8WS0JGDvedeeZsJ+4kyaVeD
221
+ to4oJMaXXeDZuUXESJgQO19IE3Bp1UAKyIAm9xkapoXegvrcNSO55pjT80xl
222
+ vNM+depGegZDRWgA/jggtrGdoeZrhxIuv62N1aobSyreAlPQv9VRGKsWhw7S
223
+ 41SyS3VBds12hB6igvFMWF9/F3918naL4u+vKpDug2KQTizOH/iGyvtzI6pO
224
+ WUpVtNpsRqmh301XEo6L1EEwNm0cgQKn0V/+7d9T11bRtsFkq2wTWQMBVHtK
225
+ at/apl81q/+sm1Uti1WzEv64e5/Wf9RNq1YpVNPa4m5YBar/qptTrWas5rRs
226
+ dN9JGBHMK6qIUlGRPeFInxO50qi4GbIcknCNzJC7sBSS30RxvIuKgPR6VGms
227
+ gm7t9pCvrNsk8HcJj+Y1Er23umHWz7Z228hX1m2e4my3u5HWT7d2O8lX1m2q
228
+ 4nS3uMHWz7V2m8lX1m224lzXb7w60s8se2F4lz3M4qijL9RpRHd3M9nX+A9S
229
+ L8GZTIkVgWrU61zjmER4MnNX4i017FpWI8qtoM3HYrtNqBx+4vhILN2aLKjd
230
+ OG/mHry0AbyjFOlepZDecoFkO1V46Rky0nDsFM6k8q1ZKzFLvuA7se5aw+42
231
+ /sTy6Bl5VilVPZLuIyI7CmKkgV6ceuGGtwpdpFGKNB6M9GoYdIhV4FAl0Jcs
232
+ SCqRNz0Xh0NrIk3LJ3sy3T7K2MSRsvXhhlwjA+UihdvDUOeIoZSc4uHotTiX
233
+ F6JNlYwBCsifH+08RDYhnPkikZA9cpOnM/3RNHmKMqfpzsPqQ6Pw8CH+Vd7u
234
+ pQ0wEvUvnCkIcdKU5J4szEFO4bGYAWWnIUACwTSG7YsqK8d6vHMhHom7phsH
235
+ +aOLncc1r40D136Mt2peo0cXFzs7j39AOd7jx/BCP35Mnu8xdgjtTe8xzj/J
236
+ ZlwkljIfKoH7LYIPWE4h+hCOPiCgBUhz2F9LoH6MEnJkiOs76sIAcNJLs3CN
237
+ 49xLoHu33J8Em04l6aI/Q10ZOx111SG4q7471Hf2o9j5hqs8cWILpX6I4AWU
238
+ /VRa7bE48Liy1Ac/pRl6iLj+/FVLnYucxl+/3pGEus8e/HTkSnr9+at55H5d
239
+ RAKRjcrwSyEpV66O+sizNHAQPg6GR1IsTul4sKPG/vmrSjvE+ARk0TEOWrUu
240
+ f50jw7FCj0cx+9b0FzinhOr/Y9Ox93d2Rvi1i0glDqygIJjg5nM6JxaVFXhI
241
+ v9E1ZU/RXoJkwcFByIuqJapcUhZoa9mTLWNcEE0vb5BYh9OkqXYjTzGRWFgS
242
+ kjlxhqOzBbJwbvLjP2nWInt4OYp9m5yX8l8BqKa+oFzQjJC0dti4QMamj9Mu
243
+ v3l1IgOrS5vhl19+yQ9S35nMfRlxm0CMLFDxFSVfoaOv2Z9Au84kv2bsH5oT
244
+ JBjAi/TVnwC9/d2HCETv/v412v2+8/sOur3lNg5H71DhQTIZqEOtLk4hJ4Wg
245
+ i1P5ME6VtF/TicrELtkpimUQJBTNuUTSDkGYSkPjKvRBRMQLbT6hzNp9yRmx
246
+ OIaT5xgKmNM7DC5gOrwXqX0UZrkVTwpHxz7JfDkpynaj4B5WVl3HMUVx8Yci
247
+ qDwZIek56qaomNL0d34hMWDadkSs/BeGaCj1II+SptA9JAnlk6TcPadmqQBi
248
+ YepWKlfFUoukauJEWE2cOq3h/NQYLKRIuSVbR/w9ibSql50FOPUH0wYp05Su
249
+ kfqdnn5Jxe3qzI4mO8WJ81hMgvpgc4q00mZO9TnV7T/NxHphQgg4e3DoIZND
250
+ XpSot6gpiFXlH3LW0Bx2RcpNdKEOPOIXVJe8SxXPpiZKnncpqwd/LkHdmyFz
251
+ 43CXcnt2C9XLBziTz17QoVzy74v8Pw81z153ZAAA
252
+ http_version: '1.1'
253
+ recorded_at: Tue, 14 Feb 2012 08:26:10 GMT
254
+ recorded_with: VCR 2.0.0.rc1
data/lib/wombat/parser.rb CHANGED
@@ -20,7 +20,8 @@ module Wombat
20
20
  self.context = n
21
21
  it.all_properties.each do |p|
22
22
  p.result ||= []
23
- p.result << locate_first(p)
23
+ result = locate_first(p)
24
+ p.result << result if result
24
25
  end
25
26
  end
26
27
  end
@@ -33,4 +33,31 @@ describe 'basic crawler setup' do
33
33
  results["social"]["twitter"].should == "Verão"
34
34
  end
35
35
  end
36
+
37
+ it 'should iterate elements' do
38
+ VCR.use_cassette('for_each_page') do
39
+ crawler = Class.new
40
+ crawler.send(:include, Wombat::Crawler)
41
+
42
+ crawler.base_url "https://www.github.com"
43
+ crawler.list_page "/explore"
44
+
45
+ crawler.for_each "css=ol.ranked-repositories li" do
46
+ repo 'css=h3'
47
+ description 'css=p.description'
48
+ end
49
+
50
+ crawler_instance = crawler.new
51
+ results = crawler_instance.crawl
52
+
53
+ results["repo"].should =~ ["jairajs89 / Touchy.js", "mcavage / node-restify", "notlion / streetview-stereographic", "twitter / bootstrap", "stolksdorf / Parallaxjs"]
54
+ results["description"].should =~ [
55
+ "node.js REST framework specifically meant for web service APIs",
56
+ "A simple light-weight JavaScript library for dealing with touch events",
57
+ "Shader Toy + Google Map + Panoramic Explorer",
58
+ "HTML, CSS, and JS toolkit from Twitter",
59
+ "a Library for Javascript that allows easy page parallaxing"
60
+ ]
61
+ end
62
+ end
36
63
  end
data/spec/parser_spec.rb CHANGED
@@ -97,8 +97,8 @@ describe Wombat::Parser do
97
97
  @metadata.should_receive(:iterators).and_return [it]
98
98
  @metadata.should_receive(:flatten)
99
99
  fake_document.should_receive(:parser).and_return(fake_parser)
100
- it['prop_1'].should_receive(:result).exactly(4).times.and_return([])
101
- it['prop_2'].should_receive(:result).exactly(4).times.and_return([])
100
+ it['prop_1'].should_receive(:result).exactly(2).times.and_return([])
101
+ it['prop_2'].should_receive(:result).exactly(2).times.and_return([])
102
102
  @parser.mechanize.stub(:get).and_return fake_document
103
103
  @parser.should_receive(:select_nodes).with("it_selector").and_return [c1, c2]
104
104
  @parser.should_receive(:context=).with(c1).ordered
@@ -110,4 +110,30 @@ describe Wombat::Parser do
110
110
 
111
111
  @parser.parse(@metadata)
112
112
  end
113
+
114
+ it 'should not include null results in iterated block' do
115
+ fake_parser = double :parser
116
+ fake_document = double :document
117
+ c1 = double :context
118
+ c2 = double :context
119
+ it = Wombat::Iterator.new "it_selector"
120
+ it.prop_1 "some_selector"
121
+
122
+ @parser.should_receive(:context=).ordered
123
+ @metadata.should_receive(:iterators).and_return [it]
124
+ @metadata.should_receive(:flatten)
125
+ fake_document.should_receive(:parser).and_return(fake_parser)
126
+ @parser.mechanize.stub(:get).and_return fake_document
127
+ @parser.should_receive(:select_nodes).with("it_selector").and_return [c1, c2]
128
+ @parser.should_receive(:context=).with(c1).ordered
129
+ @parser.should_receive(:context=).with(c2).ordered
130
+ @parser.should_receive(:context=).ordered
131
+ @parser.should_receive(:locate_first).with(it['prop_1']).and_return(12)
132
+ @parser.should_receive(:locate_first).with(it['prop_1']).and_return(nil)
133
+ @parser.stub(:locate)
134
+
135
+ @parser.parse(@metadata)
136
+
137
+ it["prop_1"].result.should == [12]
138
+ end
113
139
  end
data/wombat.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "wombat"
8
- s.version = "0.2.2"
8
+ s.version = "0.2.3"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Felipe Lima"]
@@ -28,6 +28,7 @@ Gem::Specification.new do |s|
28
28
  "Rakefile",
29
29
  "VERSION",
30
30
  "fixtures/vcr_cassettes/basic_crawler_page.yml",
31
+ "fixtures/vcr_cassettes/for_each_page.yml",
31
32
  "lib/wombat.rb",
32
33
  "lib/wombat/crawler.rb",
33
34
  "lib/wombat/iterator.rb",
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wombat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-02-14 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
16
- requirement: &70358424414520 !ruby/object:Gem::Requirement
16
+ requirement: &70328357893040 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70358424414520
24
+ version_requirements: *70328357893040
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: activesupport
27
- requirement: &70358424414040 !ruby/object:Gem::Requirement
27
+ requirement: &70328357892320 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: '0'
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70358424414040
35
+ version_requirements: *70328357892320
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: bundler
38
- requirement: &70358424413560 !ruby/object:Gem::Requirement
38
+ requirement: &70328357891720 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ! '>='
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: '0'
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70358424413560
46
+ version_requirements: *70328357891720
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rake
49
- requirement: &70358424413080 !ruby/object:Gem::Requirement
49
+ requirement: &70328357907480 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ! '>='
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: '0'
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *70358424413080
57
+ version_requirements: *70328357907480
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: yard
60
- requirement: &70358424412600 !ruby/object:Gem::Requirement
60
+ requirement: &70328357906820 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ! '>='
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: '0'
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *70358424412600
68
+ version_requirements: *70328357906820
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: jeweler
71
- requirement: &70358424412120 !ruby/object:Gem::Requirement
71
+ requirement: &70328357906240 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ! '>='
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: '0'
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *70358424412120
79
+ version_requirements: *70328357906240
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: rspec
82
- requirement: &70358424411640 !ruby/object:Gem::Requirement
82
+ requirement: &70328357905660 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ! '>='
@@ -87,10 +87,10 @@ dependencies:
87
87
  version: '0'
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *70358424411640
90
+ version_requirements: *70328357905660
91
91
  - !ruby/object:Gem::Dependency
92
92
  name: vcr
93
- requirement: &70358424411160 !ruby/object:Gem::Requirement
93
+ requirement: &70328357905040 !ruby/object:Gem::Requirement
94
94
  none: false
95
95
  requirements:
96
96
  - - =
@@ -98,10 +98,10 @@ dependencies:
98
98
  version: 2.0.0.rc1
99
99
  type: :development
100
100
  prerelease: false
101
- version_requirements: *70358424411160
101
+ version_requirements: *70328357905040
102
102
  - !ruby/object:Gem::Dependency
103
103
  name: fakeweb
104
- requirement: &70358424427060 !ruby/object:Gem::Requirement
104
+ requirement: &70328357904360 !ruby/object:Gem::Requirement
105
105
  none: false
106
106
  requirements:
107
107
  - - ! '>='
@@ -109,7 +109,7 @@ dependencies:
109
109
  version: '0'
110
110
  type: :development
111
111
  prerelease: false
112
- version_requirements: *70358424427060
112
+ version_requirements: *70328357904360
113
113
  description: Generic Web crawler with a DSL that parses structured data from web pages
114
114
  email: felipe.lima@gmail.com
115
115
  executables: []
@@ -129,6 +129,7 @@ files:
129
129
  - Rakefile
130
130
  - VERSION
131
131
  - fixtures/vcr_cassettes/basic_crawler_page.yml
132
+ - fixtures/vcr_cassettes/for_each_page.yml
132
133
  - lib/wombat.rb
133
134
  - lib/wombat/crawler.rb
134
135
  - lib/wombat/iterator.rb