rubyscraper 0.3.0 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,71 +1,57 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rubyscraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nathan Owsiany
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-26 00:00:00.000000000 Z
11
+ date: 2015-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: capybara
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '2.4'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '2.4'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: poltergeist
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ">="
31
+ - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '0'
33
+ version: '1.6'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ">="
38
+ - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '0'
40
+ version: '1.6'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rest-client
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
46
- - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :runtime
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - ">="
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- - !ruby/object:Gem::Dependency
56
- name: slop
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - ">="
45
+ - - "~>"
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
47
+ version: '1.8'
62
48
  type: :runtime
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - ">="
52
+ - - "~>"
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: '1.8'
69
55
  - !ruby/object:Gem::Dependency
70
56
  name: bundler
71
57
  requirement: !ruby/object:Gem::Requirement
@@ -112,16 +98,16 @@ dependencies:
112
98
  name: pry
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
- - - ">="
101
+ - - "~>"
116
102
  - !ruby/object:Gem::Version
117
- version: '0'
103
+ version: '0.10'
118
104
  type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
107
  requirements:
122
- - - ">="
108
+ - - "~>"
123
109
  - !ruby/object:Gem::Version
124
- version: '0'
110
+ version: '0.10'
125
111
  description: Scrapes job sites for job details and sends post request to server.
126
112
  email: nowsiany@gmail.com
127
113
  executables:
@@ -136,13 +122,21 @@ files:
136
122
  - bin/console
137
123
  - bin/rubyscraper
138
124
  - bin/setup
139
- - lib/assets/scrapes.json
140
125
  - lib/rubyscraper.rb
126
+ - lib/rubyscraper/api_dispatcher.rb
141
127
  - lib/rubyscraper/binary.rb
128
+ - lib/rubyscraper/option_parser.rb
129
+ - lib/rubyscraper/paginator.rb
130
+ - lib/rubyscraper/processor.rb
131
+ - lib/rubyscraper/sub_page_scraper.rb
132
+ - lib/rubyscraper/summary_scraper.rb
142
133
  - lib/rubyscraper/version.rb
143
134
  - rubyscraper.gemspec
135
+ - spec/paginator_spec.rb
144
136
  - spec/rubyscraper_spec.rb
145
137
  - spec/spec_helper.rb
138
+ - spec/sub_page_scraper_spec.rb
139
+ - spec/summary_scraper_spec.rb
146
140
  homepage: https://github.com/ndwhtlssthr/rubyscraper
147
141
  licenses:
148
142
  - MIT
@@ -1,287 +0,0 @@
1
- [
2
- {
3
- "name":"stackoverflow",
4
- "base_url":"http://www.careers.stackoverflow.com",
5
- "summary":{
6
- "url":"/jobs?searchTerm=SEARCHTERM&sort=p",
7
- "pagination_fmt":"&pg=",
8
- "pagination_start":"1",
9
- "pagination_scale":"1",
10
- "params":[
11
- {
12
- "SEARCHTERM":[
13
- "ruby",
14
- "ruby+on+rails",
15
- "javascript"
16
- ]
17
- }
18
- ],
19
- "loop":".listResults .-item",
20
- "fields":[
21
- {
22
- "field":"position",
23
- "method":"find",
24
- "path":"h3.-title a"
25
- },
26
- {
27
- "field":"url",
28
- "method":"find",
29
- "path":"h3.-title a",
30
- "attr":"href"
31
- },
32
- {
33
- "field":"posting_date",
34
- "method":"first",
35
- "path":"p._muted"
36
- }
37
- ]
38
- },
39
- "sub_page":{
40
- "fields":[
41
- {
42
- "field":"company",
43
- "method":"find",
44
- "path":"a.employer"
45
- },
46
- {
47
- "field":"location",
48
- "method":"find",
49
- "path":"span.location"
50
- },
51
- {
52
- "field":"description",
53
- "method":"all",
54
- "path":"div.description p",
55
- "loop_collect":"text",
56
- "join":"\n"
57
- },
58
- {
59
- "field":"tags",
60
- "method":"all",
61
- "path":"div.tags a.post-tag",
62
- "loop_collect":"text",
63
- "join":", "
64
- }
65
- ]
66
- }
67
- },
68
- {
69
- "name":"rubynow",
70
- "base_url":"http://jobs.rubynow.com/",
71
- "summary":{
72
- "url":"",
73
- "no_pagination?":"true",
74
- "pagination_fmt":"",
75
- "pagination_start":"",
76
- "pagination_scale":"",
77
- "params":[
78
- ],
79
- "loop":"ul.jobs li",
80
- "fields":[
81
- {
82
- "field":"position",
83
- "method":"find",
84
- "path":"h3 a"
85
- },
86
- {
87
- "field":"url",
88
- "method":"find",
89
- "path":"h3 a",
90
- "attr":"href"
91
- },
92
- {
93
- "field":"posting_date",
94
- "method":"find",
95
- "path":"span.date"
96
- }
97
- ]
98
- },
99
- "sub_page":{
100
- "fields":[
101
- {
102
- "field":"description",
103
- "method":"all",
104
- "path":"div#info p",
105
- "loop_collect":"text",
106
- "join":"\n"
107
- },
108
- {
109
- "field":"company",
110
- "method":"find",
111
- "path":"h2#headline a"
112
- },
113
- {
114
- "field":"location",
115
- "method":"find",
116
- "path":"h3#location"
117
- }
118
- ]
119
- }
120
- },
121
- {
122
- "name":"weworkremotely",
123
- "base_url":"https://weworkremotely.com",
124
- "summary":{
125
- "url":"/categories/2/jobs",
126
- "no_pagination?":"true",
127
- "pagination_fmt":"",
128
- "pagination_start":"",
129
- "pagination_scale":"",
130
- "params":[
131
- ],
132
- "loop":"section.jobs ul li",
133
- "fields":[
134
- {
135
- "field":"position",
136
- "method":"find",
137
- "path":"span.title"
138
- },
139
- {
140
- "field":"company",
141
- "method":"find",
142
- "path":"span.company"
143
- },
144
- {
145
- "field":"url",
146
- "method":"find",
147
- "path":"a",
148
- "attr":"href"
149
- },
150
- {
151
- "field":"posting_date",
152
- "method":"find",
153
- "path":"span.date"
154
- }
155
- ]
156
- },
157
- "sub_page":{
158
- "fields":[
159
- {
160
- "field":"location",
161
- "method":"find",
162
- "path":"span.location"
163
- },
164
- {
165
- "field":"description",
166
- "method":"all",
167
- "path":"div.listing-container div",
168
- "loop_collect":"text",
169
- "join":"\n"
170
- }
171
- ]
172
- }
173
- },
174
- {
175
- "name":"indeed",
176
- "skip":"true",
177
- "base_url":"http://www.indeed.com",
178
- "summary":{
179
- "url":"/jobs?q=SEARCHTERM&sr=directhire",
180
- "pagination_fmt":"&start=",
181
- "pagination_start":"0",
182
- "pagination_scale":"10",
183
- "params":[
184
- {
185
- "SEARCHTERM":[
186
- "ruby",
187
- "ruby+on+rails",
188
- "junior+web+developer",
189
- "ember.js",
190
- "full+stack"
191
- ]
192
- }
193
- ],
194
- "loop":"div.row.result",
195
- "fields":[
196
- {
197
- "field":"position",
198
- "method":"find",
199
- "path":"h2.jobtitle a"
200
- },
201
- {
202
- "field":"url",
203
- "method":"find",
204
- "path":"h2.jobtitle a",
205
- "attr":"href"
206
- },
207
- {
208
- "field":"company",
209
- "method":"find",
210
- "path":"span.company span"
211
- },
212
- {
213
- "field":"location",
214
- "method":"find",
215
- "path":"span.location span"
216
- },
217
- {
218
- "field":"description",
219
- "method":"find",
220
- "path":"span.summary span"
221
- },
222
- {
223
- "field":"posting_date",
224
- "method":"find",
225
- "path":"span.date"
226
- }
227
- ]
228
- },
229
- "sub_page":{
230
- "fields":[
231
- ]
232
- }
233
- },
234
- {
235
- "name":"linkedin",
236
- "skip":"true",
237
- "base_url":"https://www.linkedin.com",
238
- "summary":{
239
- "url":"/vsearch/j?keywords=SEARCHTERM&openAdvancedForm=true&locationType=I&countryCode=us&rsid=754744171429892349899&orig=FCTD&openFacets=L,C,TP&f_TP=1&pt=jobs&pt=jobs",
240
- "pagination_fmt":"&page_num=",
241
- "pagination_start":"1",
242
- "pagination_scale":"1",
243
- "params":[
244
- {
245
- "SEARCHTERM":[
246
- "Ruby",
247
- "Ruby+On+Rails",
248
- "javascript"
249
- ]
250
- }
251
- ],
252
- "loop":"ol.search-results li.result",
253
- "fields":[
254
- {
255
- "field":"position",
256
- "method":"find",
257
- "path":"a.title"
258
- },
259
- {
260
- "field":"url",
261
- "method":"find",
262
- "path":"a.title",
263
- "attr":"href"
264
- },
265
- {
266
- "field":"company",
267
- "method":"find",
268
- "path":"div.description a"
269
- },
270
- {
271
- "field":"location",
272
- "method":"find",
273
- "path":"dl.demographic bdi"
274
- }
275
- ]
276
- },
277
- "sub_page":{
278
- "fields":[
279
- {
280
- "field":"description",
281
- "method":"find",
282
- "path":"div.description-section div.rich-text"
283
- }
284
- ]
285
- }
286
- }
287
- ]