rubyscraper 0.3.0 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,71 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Owsiany
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-26 00:00:00.000000000 Z
11
+ date: 2015-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '2.4'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '2.4'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: poltergeist
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '1.6'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '1.6'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rest-client
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: slop
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
45
+ - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: '1.8'
62
48
  type: :runtime
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - ">="
52
+ - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: '1.8'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: bundler
71
57
  requirement: !ruby/object:Gem::Requirement
@@ -112,16 +98,16 @@ dependencies:
112
98
  name: pry
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
- - - ">="
101
+ - - "~>"
116
102
  - !ruby/object:Gem::Version
117
- version: '0'
103
+ version: '0.10'
118
104
  type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
107
  requirements:
122
- - - ">="
108
+ - - "~>"
123
109
  - !ruby/object:Gem::Version
124
- version: '0'
110
+ version: '0.10'
125
111
  description: Scrapes job sites for job details and sends post request to server.
126
112
  email: nowsiany@gmail.com
127
113
  executables:
@@ -136,13 +122,21 @@ files:
136
122
  - bin/console
137
123
  - bin/rubyscraper
138
124
  - bin/setup
139
- - lib/assets/scrapes.json
140
125
  - lib/rubyscraper.rb
126
+ - lib/rubyscraper/api_dispatcher.rb
141
127
  - lib/rubyscraper/binary.rb
128
+ - lib/rubyscraper/option_parser.rb
129
+ - lib/rubyscraper/paginator.rb
130
+ - lib/rubyscraper/processor.rb
131
+ - lib/rubyscraper/sub_page_scraper.rb
132
+ - lib/rubyscraper/summary_scraper.rb
142
133
  - lib/rubyscraper/version.rb
143
134
  - rubyscraper.gemspec
135
+ - spec/paginator_spec.rb
144
136
  - spec/rubyscraper_spec.rb
145
137
  - spec/spec_helper.rb
138
+ - spec/sub_page_scraper_spec.rb
139
+ - spec/summary_scraper_spec.rb
146
140
  homepage: https://github.com/ndwhtlssthr/rubyscraper
147
141
  licenses:
148
142
  - MIT
@@ -1,287 +0,0 @@
1
- [
2
- {
3
- "name":"stackoverflow",
4
- "base_url":"http://www.careers.stackoverflow.com",
5
- "summary":{
6
- "url":"/jobs?searchTerm=SEARCHTERM&sort=p",
7
- "pagination_fmt":"&pg=",
8
- "pagination_start":"1",
9
- "pagination_scale":"1",
10
- "params":[
11
- {
12
- "SEARCHTERM":[
13
- "ruby",
14
- "ruby+on+rails",
15
- "javascript"
16
- ]
17
- }
18
- ],
19
- "loop":".listResults .-item",
20
- "fields":[
21
- {
22
- "field":"position",
23
- "method":"find",
24
- "path":"h3.-title a"
25
- },
26
- {
27
- "field":"url",
28
- "method":"find",
29
- "path":"h3.-title a",
30
- "attr":"href"
31
- },
32
- {
33
- "field":"posting_date",
34
- "method":"first",
35
- "path":"p._muted"
36
- }
37
- ]
38
- },
39
- "sub_page":{
40
- "fields":[
41
- {
42
- "field":"company",
43
- "method":"find",
44
- "path":"a.employer"
45
- },
46
- {
47
- "field":"location",
48
- "method":"find",
49
- "path":"span.location"
50
- },
51
- {
52
- "field":"description",
53
- "method":"all",
54
- "path":"div.description p",
55
- "loop_collect":"text",
56
- "join":"\n"
57
- },
58
- {
59
- "field":"tags",
60
- "method":"all",
61
- "path":"div.tags a.post-tag",
62
- "loop_collect":"text",
63
- "join":", "
64
- }
65
- ]
66
- }
67
- },
68
- {
69
- "name":"rubynow",
70
- "base_url":"http://jobs.rubynow.com/",
71
- "summary":{
72
- "url":"",
73
- "no_pagination?":"true",
74
- "pagination_fmt":"",
75
- "pagination_start":"",
76
- "pagination_scale":"",
77
- "params":[
78
- ],
79
- "loop":"ul.jobs li",
80
- "fields":[
81
- {
82
- "field":"position",
83
- "method":"find",
84
- "path":"h3 a"
85
- },
86
- {
87
- "field":"url",
88
- "method":"find",
89
- "path":"h3 a",
90
- "attr":"href"
91
- },
92
- {
93
- "field":"posting_date",
94
- "method":"find",
95
- "path":"span.date"
96
- }
97
- ]
98
- },
99
- "sub_page":{
100
- "fields":[
101
- {
102
- "field":"description",
103
- "method":"all",
104
- "path":"div#info p",
105
- "loop_collect":"text",
106
- "join":"\n"
107
- },
108
- {
109
- "field":"company",
110
- "method":"find",
111
- "path":"h2#headline a"
112
- },
113
- {
114
- "field":"location",
115
- "method":"find",
116
- "path":"h3#location"
117
- }
118
- ]
119
- }
120
- },
121
- {
122
- "name":"weworkremotely",
123
- "base_url":"https://weworkremotely.com",
124
- "summary":{
125
- "url":"/categories/2/jobs",
126
- "no_pagination?":"true",
127
- "pagination_fmt":"",
128
- "pagination_start":"",
129
- "pagination_scale":"",
130
- "params":[
131
- ],
132
- "loop":"section.jobs ul li",
133
- "fields":[
134
- {
135
- "field":"position",
136
- "method":"find",
137
- "path":"span.title"
138
- },
139
- {
140
- "field":"company",
141
- "method":"find",
142
- "path":"span.company"
143
- },
144
- {
145
- "field":"url",
146
- "method":"find",
147
- "path":"a",
148
- "attr":"href"
149
- },
150
- {
151
- "field":"posting_date",
152
- "method":"find",
153
- "path":"span.date"
154
- }
155
- ]
156
- },
157
- "sub_page":{
158
- "fields":[
159
- {
160
- "field":"location",
161
- "method":"find",
162
- "path":"span.location"
163
- },
164
- {
165
- "field":"description",
166
- "method":"all",
167
- "path":"div.listing-container div",
168
- "loop_collect":"text",
169
- "join":"\n"
170
- }
171
- ]
172
- }
173
- },
174
- {
175
- "name":"indeed",
176
- "skip":"true",
177
- "base_url":"http://www.indeed.com",
178
- "summary":{
179
- "url":"/jobs?q=SEARCHTERM&sr=directhire",
180
- "pagination_fmt":"&start=",
181
- "pagination_start":"0",
182
- "pagination_scale":"10",
183
- "params":[
184
- {
185
- "SEARCHTERM":[
186
- "ruby",
187
- "ruby+on+rails",
188
- "junior+web+developer",
189
- "ember.js",
190
- "full+stack"
191
- ]
192
- }
193
- ],
194
- "loop":"div.row.result",
195
- "fields":[
196
- {
197
- "field":"position",
198
- "method":"find",
199
- "path":"h2.jobtitle a"
200
- },
201
- {
202
- "field":"url",
203
- "method":"find",
204
- "path":"h2.jobtitle a",
205
- "attr":"href"
206
- },
207
- {
208
- "field":"company",
209
- "method":"find",
210
- "path":"span.company span"
211
- },
212
- {
213
- "field":"location",
214
- "method":"find",
215
- "path":"span.location span"
216
- },
217
- {
218
- "field":"description",
219
- "method":"find",
220
- "path":"span.summary span"
221
- },
222
- {
223
- "field":"posting_date",
224
- "method":"find",
225
- "path":"span.date"
226
- }
227
- ]
228
- },
229
- "sub_page":{
230
- "fields":[
231
- ]
232
- }
233
- },
234
- {
235
- "name":"linkedin",
236
- "skip":"true",
237
- "base_url":"https://www.linkedin.com",
238
- "summary":{
239
- "url":"/vsearch/j?keywords=SEARCHTERM&openAdvancedForm=true&locationType=I&countryCode=us&rsid=754744171429892349899&orig=FCTD&openFacets=L,C,TP&f_TP=1&pt=jobs&pt=jobs",
240
- "pagination_fmt":"&page_num=",
241
- "pagination_start":"1",
242
- "pagination_scale":"1",
243
- "params":[
244
- {
245
- "SEARCHTERM":[
246
- "Ruby",
247
- "Ruby+On+Rails",
248
- "javascript"
249
- ]
250
- }
251
- ],
252
- "loop":"ol.search-results li.result",
253
- "fields":[
254
- {
255
- "field":"position",
256
- "method":"find",
257
- "path":"a.title"
258
- },
259
- {
260
- "field":"url",
261
- "method":"find",
262
- "path":"a.title",
263
- "attr":"href"
264
- },
265
- {
266
- "field":"company",
267
- "method":"find",
268
- "path":"div.description a"
269
- },
270
- {
271
- "field":"location",
272
- "method":"find",
273
- "path":"dl.demographic bdi"
274
- }
275
- ]
276
- },
277
- "sub_page":{
278
- "fields":[
279
- {
280
- "field":"description",
281
- "method":"find",
282
- "path":"div.description-section div.rich-text"
283
- }
284
- ]
285
- }
286
- }
287
- ]