scrapinghub-client 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +1 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +6 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE +21 -0
  7. data/README.md +46 -0
  8. data/Rakefile +22 -0
  9. data/lib/scrapinghub-client.rb +3 -0
  10. data/lib/scrapinghub/jobs.rb +175 -0
  11. data/lib/scrapinghub/version.rb +3 -0
  12. data/scrapinghub.gemspec +26 -0
  13. data/spec/fixtures/vcr_cassettes/jobs/delete/bad_auth.yml +38 -0
  14. data/spec/fixtures/vcr_cassettes/jobs/delete/job/invalid.yml +38 -0
  15. data/spec/fixtures/vcr_cassettes/jobs/delete/job/multiple.yml +38 -0
  16. data/spec/fixtures/vcr_cassettes/jobs/delete/job/single.yml +38 -0
  17. data/spec/fixtures/vcr_cassettes/jobs/delete/project/invalid.yml +39 -0
  18. data/spec/fixtures/vcr_cassettes/jobs/delete/project/valid.yml +38 -0
  19. data/spec/fixtures/vcr_cassettes/jobs/list/bad_auth.yml +38 -0
  20. data/spec/fixtures/vcr_cassettes/jobs/list/count/3.yml +53 -0
  21. data/spec/fixtures/vcr_cassettes/jobs/list/has_tag/invalid.yml +38 -0
  22. data/spec/fixtures/vcr_cassettes/jobs/list/has_tag/multiple.yml +53 -0
  23. data/spec/fixtures/vcr_cassettes/jobs/list/has_tag/single.yml +48 -0
  24. data/spec/fixtures/vcr_cassettes/jobs/list/job/invalid.yml +38 -0
  25. data/spec/fixtures/vcr_cassettes/jobs/list/job/multiple.yml +48 -0
  26. data/spec/fixtures/vcr_cassettes/jobs/list/job/single.yml +43 -0
  27. data/spec/fixtures/vcr_cassettes/jobs/list/lacks_tag/invalid.yml +81 -0
  28. data/spec/fixtures/vcr_cassettes/jobs/list/lacks_tag/multiple.yml +67 -0
  29. data/spec/fixtures/vcr_cassettes/jobs/list/lacks_tag/single.yml +72 -0
  30. data/spec/fixtures/vcr_cassettes/jobs/list/project/invalid.yml +39 -0
  31. data/spec/fixtures/vcr_cassettes/jobs/list/project/valid.yml +81 -0
  32. data/spec/fixtures/vcr_cassettes/jobs/list/spider/invalid.yml +38 -0
  33. data/spec/fixtures/vcr_cassettes/jobs/list/spider/valid.yml +62 -0
  34. data/spec/fixtures/vcr_cassettes/jobs/list/state/finished.yml +81 -0
  35. data/spec/fixtures/vcr_cassettes/jobs/list/state/pending.yml +38 -0
  36. data/spec/fixtures/vcr_cassettes/jobs/schedule/bad_auth.yml +38 -0
  37. data/spec/fixtures/vcr_cassettes/jobs/schedule/project/invalid.yml +39 -0
  38. data/spec/fixtures/vcr_cassettes/jobs/schedule/spider/add_tag.yml +38 -0
  39. data/spec/fixtures/vcr_cassettes/jobs/schedule/spider/already-running.yml +39 -0
  40. data/spec/fixtures/vcr_cassettes/jobs/schedule/spider/extra.yml +38 -0
  41. data/spec/fixtures/vcr_cassettes/jobs/schedule/spider/minimal.yml +38 -0
  42. data/spec/fixtures/vcr_cassettes/jobs/schedule/spider/priority.yml +38 -0
  43. data/spec/fixtures/vcr_cassettes/jobs/stop/bad_auth.yml +38 -0
  44. data/spec/fixtures/vcr_cassettes/jobs/stop/job/already-stopped.yml +38 -0
  45. data/spec/fixtures/vcr_cassettes/jobs/stop/job/invalid.yml +38 -0
  46. data/spec/fixtures/vcr_cassettes/jobs/stop/job/valid.yml +38 -0
  47. data/spec/fixtures/vcr_cassettes/jobs/stop/project/invalid.yml +39 -0
  48. data/spec/fixtures/vcr_cassettes/jobs/update/bad_auth.yml +38 -0
  49. data/spec/fixtures/vcr_cassettes/jobs/update/has_tag.yml +38 -0
  50. data/spec/fixtures/vcr_cassettes/jobs/update/job.yml +38 -0
  51. data/spec/fixtures/vcr_cassettes/jobs/update/lacks_tag.yml +38 -0
  52. data/spec/fixtures/vcr_cassettes/jobs/update/no-query-filters.yml +38 -0
  53. data/spec/fixtures/vcr_cassettes/jobs/update/no-update-params.yml +38 -0
  54. data/spec/fixtures/vcr_cassettes/jobs/update/project/invalid.yml +39 -0
  55. data/spec/fixtures/vcr_cassettes/jobs/update/spider.yml +38 -0
  56. data/spec/fixtures/vcr_cassettes/jobs/update/state.yml +38 -0
  57. data/spec/integration/jobs_spec.rb +567 -0
  58. data/spec/spec_helper.rb +22 -0
  59. data/spec/unit/jobs_spec.rb +188 -0
  60. data/spec/unit/scrapinghub_spec.rb +8 -0
  61. metadata +200 -0
@@ -0,0 +1,81 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://XXX:@dash.scrapinghub.com/api/jobs/list.json?project=1&state=finished
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sat, 25 Jul 2015 17:59:13 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Transfer-Encoding:
28
+ - chunked
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: ASCII-8BIT
35
+ string: '{"status": "ok", "count": 9, "total": 9, "jobs": [{"responses_received":
36
+ 5323, "items_scraped": 1030, "logs": 167, "tags": ["foo", "bar"], "version":
37
+ "1434570463", "spider": "atlantic_firearms_crawl", "updated_time": "2015-07-23T01:26:37",
38
+ "priority": 2, "state": "finished", "close_reason": "cancel_timeout", "spider_type":
39
+ "manual", "spider_args": {}, "started_time": "2015-07-22T23:02:13", "id":
40
+ "1/1/6", "errors_count": 4, "elapsed": 4088408}, {"responses_received":
41
+ 6243, "items_scraped": 1122, "version": "1434570463", "tags": [], "close_reason":
42
+ "cancel_timeout", "spider": "atlantic_firearms_crawl", "updated_time": "2015-07-22T22:35:04",
43
+ "priority": 2, "state": "finished", "spider_type": "manual", "spider_args":
44
+ {}, "started_time": "2015-07-22T20:03:31", "id": "1/1/4", "errors_count":
45
+ 3, "elapsed": 242372452, "logs": 179}, {"responses_received": 365486, "items_scraped":
46
+ 4037, "close_reason": "cancel_timeout", "logs": 33631, "tags": ["foo"], "version":
47
+ "1434570463", "spider": "firearms_for_sale_crawl", "updated_time": "2015-07-21T02:43:25",
48
+ "priority": 2, "state": "finished", "spider_args": {}, "started_time": "2015-06-27T18:36:06",
49
+ "id": "1/5/1", "errors_count": 0, "elapsed": 3860595, "spider_type": "manual"},
50
+ {"responses_received": 276710, "items_scraped": 63362, "close_reason": "finished",
51
+ "logs": 7356, "tags": [], "version": "1434570463", "spider": "palmetto_state_armory_crawl",
52
+ "priority": 2, "updated_time": "2015-07-02T20:49:25", "state": "finished",
53
+ "spider_type": "manual", "spider_args": {"": ""}, "started_time": "2015-06-27T18:35:17",
54
+ "id": "1/3/2", "errors_count": 0, "elapsed": 1976987487}, {"logs": 2189,
55
+ "spider": "cheaper_than_dirt_crawl", "spider_args": {}, "id": "1/2/1",
56
+ "priority": 2, "items_scraped": 81852, "state": "finished", "version": "1434570463",
57
+ "spider_type": "manual", "errors_count": 0, "responses_received": 86259, "tags":
58
+ [], "elapsed": 2901085220, "close_reason": "finished", "started_time": "2015-06-20T15:59:43",
59
+ "updated_time": "2015-06-22T04:07:48"}, {"responses_received": 1194, "items_scraped":
60
+ 1066, "version": "1434570463", "tags": ["bar"], "spider": "palmetto_state_armory_crawl",
61
+ "updated_time": "2015-06-21T20:07:12", "priority": 2, "state": "finished",
62
+ "close_reason": "cancel_timeout", "spider_args": {}, "errors_count": 0, "started_time":
63
+ "2015-06-21T19:02:41", "id": "1/3/1", "logs": 88, "elapsed": 4067005,
64
+ "spider_type": "manual"}, {"logs": 21, "spider": "atlantic_firearms_crawl",
65
+ "spider_args": {}, "id": "1/1/3", "priority": 2, "items_scraped": 25,
66
+ "state": "finished", "version": "1434570463", "spider_type": "manual", "errors_count":
67
+ 0, "responses_received": 114, "tags": [], "elapsed": 3273708778, "close_reason":
68
+ "cancel_timeout", "started_time": "2015-06-17T20:29:46", "updated_time": "2015-06-17T20:32:47"},
69
+ {"responses_received": 0, "items_scraped": 0, "close_reason": "no_reason",
70
+ "logs": 13, "tags": [], "started_time": "2015-06-17T20:27:58", "version":
71
+ "1434570463", "spider": "atlantic_firearms_crawl", "updated_time": "2015-06-17T20:28:06",
72
+ "priority": 2, "state": "finished", "spider_args": {}, "errors_count": 2,
73
+ "elapsed": 3274266396, "id": "1/1/2", "spider_type": "manual"}, {"responses_received":
74
+ 0, "items_scraped": 0, "logs": 13, "tags": [], "started_time": "2015-06-17T20:17:29",
75
+ "version": "1434570463", "spider": "atlantic_firearms_crawl", "updated_time":
76
+ "2015-06-17T20:17:40", "priority": 2, "state": "finished", "close_reason":
77
+ "no_reason", "spider_args": {}, "errors_count": 2, "elapsed": 3274893218,
78
+ "id": "1/1/1", "spider_type": "manual"}]}'
79
+ http_version:
80
+ recorded_at: Sat, 25 Jul 2015 17:59:13 GMT
81
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: get
5
+ uri: http://XXX:@dash.scrapinghub.com/api/jobs/list.json?project=1&state=pending
6
+ body:
7
+ encoding: US-ASCII
8
+ string: ''
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sat, 25 Jul 2015 18:00:23 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Transfer-Encoding:
28
+ - chunked
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: ASCII-8BIT
35
+ string: '{"status": "ok", "count": 0, "total": 0, "jobs": []}'
36
+ http_version:
37
+ recorded_at: Sat, 25 Jul 2015 18:00:24 GMT
38
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: http://XXX:@dash.scrapinghub.com/api/schedule.json
6
+ body:
7
+ encoding: UTF-8
8
+ string: project=1&spider=atlantic_firearms_crawl
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 403
19
+ message: FORBIDDEN
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sun, 26 Jul 2015 01:10:47 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Transfer-Encoding:
28
+ - chunked
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: ASCII-8BIT
35
+ string: '{"status": "error", "message": "Authentication failed"}'
36
+ http_version:
37
+ recorded_at: Sun, 26 Jul 2015 01:10:48 GMT
38
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,39 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: http://XXX:@dash.scrapinghub.com/api/schedule.json
6
+ body:
7
+ encoding: UTF-8
8
+ string: project=2&spider=atlantic_firearms_crawl
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 400
19
+ message: User 'abevoelker' doesn't have access to project 2
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sun, 26 Jul 2015 01:11:42 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Content-Length:
28
+ - '89'
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: UTF-8
35
+ string: '{"status": "badrequest", "message": "User ''abevoelker'' doesn''t have
36
+ access to project 2"}'
37
+ http_version:
38
+ recorded_at: Sun, 26 Jul 2015 01:11:42 GMT
39
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: http://XXX:@dash.scrapinghub.com/api/schedule.json
6
+ body:
7
+ encoding: UTF-8
8
+ string: add_tag=foo&project=1&spider=atlantic_firearms_crawl
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sun, 26 Jul 2015 01:51:54 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Transfer-Encoding:
28
+ - chunked
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: ASCII-8BIT
35
+ string: '{"status": "ok", "jobid": "1/1/14"}'
36
+ http_version:
37
+ recorded_at: Sun, 26 Jul 2015 01:51:55 GMT
38
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,39 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: http://XXX:@dash.scrapinghub.com/api/schedule.json
6
+ body:
7
+ encoding: UTF-8
8
+ string: project=1&spider=atlantic_firearms_crawl
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 400
19
+ message: BAD REQUEST
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sun, 26 Jul 2015 01:37:49 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Content-Length:
28
+ - '84'
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: UTF-8
35
+ string: '{"status": "error", "message": "Spider ''atlantic_firearms_crawl''
36
+ already scheduled"}'
37
+ http_version:
38
+ recorded_at: Sun, 26 Jul 2015 01:37:49 GMT
39
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: http://XXX:@dash.scrapinghub.com/api/schedule.json
6
+ body:
7
+ encoding: UTF-8
8
+ string: DOWNLOAD_DELAY=0.5&project=1&spider=atlantic_firearms_crawl
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sun, 26 Jul 2015 01:58:16 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Transfer-Encoding:
28
+ - chunked
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: ASCII-8BIT
35
+ string: '{"status": "ok", "jobid": "1/1/17"}'
36
+ http_version:
37
+ recorded_at: Sun, 26 Jul 2015 01:58:17 GMT
38
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: http://XXX:@dash.scrapinghub.com/api/schedule.json
6
+ body:
7
+ encoding: UTF-8
8
+ string: project=1&spider=atlantic_firearms_crawl
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sun, 26 Jul 2015 01:31:53 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Transfer-Encoding:
28
+ - chunked
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: ASCII-8BIT
35
+ string: '{"status": "ok", "jobid": "1/1/11"}'
36
+ http_version:
37
+ recorded_at: Sun, 26 Jul 2015 01:31:53 GMT
38
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: http://XXX:@dash.scrapinghub.com/api/schedule.json
6
+ body:
7
+ encoding: UTF-8
8
+ string: priority=4&project=1&spider=atlantic_firearms_crawl
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sun, 26 Jul 2015 01:55:59 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Transfer-Encoding:
28
+ - chunked
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: ASCII-8BIT
35
+ string: '{"status": "ok", "jobid": "1/1/16"}'
36
+ http_version:
37
+ recorded_at: Sun, 26 Jul 2015 01:55:59 GMT
38
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: http://XXX:@dash.scrapinghub.com/api/jobs/stop.json
6
+ body:
7
+ encoding: UTF-8
8
+ string: job=1%2F1%2F9&project=1
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 403
19
+ message: FORBIDDEN
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sun, 26 Jul 2015 00:06:28 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Transfer-Encoding:
28
+ - chunked
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: ASCII-8BIT
35
+ string: '{"status": "error", "message": "Authentication failed"}'
36
+ http_version:
37
+ recorded_at: Sun, 26 Jul 2015 00:06:28 GMT
38
+ recorded_with: VCR 2.9.3
@@ -0,0 +1,38 @@
1
+ ---
2
+ http_interactions:
3
+ - request:
4
+ method: post
5
+ uri: http://XXX:@dash.scrapinghub.com/api/jobs/stop.json
6
+ body:
7
+ encoding: UTF-8
8
+ string: job=1%2F1%2F6&project=1
9
+ headers:
10
+ Accept-Encoding:
11
+ - gzip;q=1.0,deflate;q=0.6,identity;q=0.3
12
+ Accept:
13
+ - "*/*"
14
+ User-Agent:
15
+ - Ruby
16
+ response:
17
+ status:
18
+ code: 200
19
+ message: OK
20
+ headers:
21
+ Server:
22
+ - nginx/1.8.0
23
+ Date:
24
+ - Sun, 26 Jul 2015 00:26:15 GMT
25
+ Content-Type:
26
+ - application/json
27
+ Content-Length:
28
+ - '16'
29
+ Connection:
30
+ - keep-alive
31
+ Vary:
32
+ - Cookie
33
+ body:
34
+ encoding: UTF-8
35
+ string: '{"status": "ok"}'
36
+ http_version:
37
+ recorded_at: Sun, 26 Jul 2015 00:26:16 GMT
38
+ recorded_with: VCR 2.9.3