extraloop 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,7 +4,7 @@ results = []
4
4
 
5
5
  ExtraLoop::IterativeScraper.new("https://www.google.com/search?tbm=nws&q=Egypt", :log => {
6
6
  :log_level => :debug,
7
- :appenders => [Logging.appenders.stderr ]
7
+ :appenders => [ Logging.appenders.stderr ]
8
8
 
9
9
  }).set_iteration(:start, (1..101).step(10)).
10
10
  loop_on("h3") { |nodes| nodes.map(&:parent) }.
@@ -23,7 +23,9 @@ options = {
23
23
  :log_level => :info
24
24
  }
25
25
  }
26
- request_arguments = { :params => params }
26
+ request_arguments = { :params => params, :headers => {
27
+ "User-Agent" => "ExtraLoop - ruby data extraction toolkit: http://github.com/afiore/extraloop"
28
+ }}
27
29
 
28
30
 
29
31
  #
@@ -39,9 +41,9 @@ ExtraLoop::IterativeScraper.new(api_url, options, request_arguments).
39
41
  extract(:ns).
40
42
  extract(:type).
41
43
  extract(:timestamp).
42
- on(:data, proc { |results|
44
+ on(:data) do |results|
43
45
  results.each { |record| all_results << record }
44
- }).
46
+ end.
45
47
  continue_with(:cmcontinue, ['query-continue', 'categorymembers', 'cmcontinue']).
46
48
  run()
47
49
 
@@ -29,7 +29,9 @@ class WikipediaCategoryScraper < ExtraLoop::IterativeScraper
29
29
  :format => :json,
30
30
  :log => false
31
31
  }
32
- request_arguments = { :params => params }
32
+ request_arguments = { :params => params, :headers => {
33
+ "User-Agent" => "ExtraLoop - ruby data extraction toolkit: http://github.com/afiore/extraloop"
34
+ }}
33
35
 
34
36
  super(@@api_url, options, request_arguments)
35
37
 
@@ -38,17 +40,16 @@ class WikipediaCategoryScraper < ExtraLoop::IterativeScraper
38
40
  extract(:ns).
39
41
  extract(:type).
40
42
  extract(:timestamp).
41
- on(:data, proc { |results|
43
+ on(:data) do |results|
42
44
  puts "#{"\t" * (@options[:depth] - 2).abs } #{@scraper.request_arguments[:params][:cmtitle]}"
43
45
  categories = results.select{ |record| record.ns === 14 }.each { |category| results.delete(category) }
44
46
 
45
-
46
47
  categories.each do |record|
47
48
  # Instanciate a sub scraper if the current depth is greater than zero and the category member is a sub category.
48
49
  WikipediaCategoryScraper.new(record.title, @options[:depth] - 1, @scraper.request_arguments[:params][:cmtitle] ).run unless @options[:depth] <= 0
49
50
  end
50
51
 
51
- }).
52
+ end.
52
53
  continue_with(:cmcontinue, ['query-continue', 'categorymembers', 'cmcontinue'])
53
54
  end
54
55
  end
@@ -89,8 +89,8 @@ module ExtraLoop
89
89
  # Returns itself.
90
90
  #
91
91
 
92
- def set_iteration(param, *args)
93
- #TODO: allow passing ranges as well as arrays
92
+ def set_iteration(param, *args, &block)
93
+ args << block if block
94
94
  if args.first.respond_to?(:map)
95
95
  @iteration_set = Array(args.first).map &:to_s
96
96
  else
@@ -110,7 +110,8 @@ module ExtraLoop
110
110
  #
111
111
  # Returns itself.
112
112
 
113
- def continue_with(param, *extractor_args)
113
+ def continue_with(param, *extractor_args, &block)
114
+ extractor_args << block if block
114
115
  raise Exceptions::NonGetAsyncRequestNotYetImplemented.new "the #continue_with method currently requires the 'async' option to be set to false" if @options[:async]
115
116
 
116
117
  @continue_clause_args = extractor_args
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extraloop
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-28 00:00:00.000000000Z
12
+ date: 2012-01-30 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yajl-ruby
16
- requirement: &19726880 !ruby/object:Gem::Requirement
16
+ requirement: &15579720 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 1.1.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *19726880
24
+ version_requirements: *15579720
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: nokogiri
27
- requirement: &19726420 !ruby/object:Gem::Requirement
27
+ requirement: &15579260 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 1.5.0
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *19726420
35
+ version_requirements: *15579260
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: typhoeus
38
- requirement: &19725960 !ruby/object:Gem::Requirement
38
+ requirement: &15578800 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,10 +43,10 @@ dependencies:
43
43
  version: 0.3.2
44
44
  type: :runtime
45
45
  prerelease: false
46
- version_requirements: *19725960
46
+ version_requirements: *15578800
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: logging
49
- requirement: &19725500 !ruby/object:Gem::Requirement
49
+ requirement: &15578340 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
@@ -54,10 +54,10 @@ dependencies:
54
54
  version: 0.6.1
55
55
  type: :runtime
56
56
  prerelease: false
57
- version_requirements: *19725500
57
+ version_requirements: *15578340
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: rspec
60
- requirement: &19725040 !ruby/object:Gem::Requirement
60
+ requirement: &15577880 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,10 +65,10 @@ dependencies:
65
65
  version: 2.7.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *19725040
68
+ version_requirements: *15577880
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: rr
71
- requirement: &19724580 !ruby/object:Gem::Requirement
71
+ requirement: &15577420 !ruby/object:Gem::Requirement
72
72
  none: false
73
73
  requirements:
74
74
  - - ~>
@@ -76,10 +76,10 @@ dependencies:
76
76
  version: 1.0.4
77
77
  type: :development
78
78
  prerelease: false
79
- version_requirements: *19724580
79
+ version_requirements: *15577420
80
80
  - !ruby/object:Gem::Dependency
81
81
  name: pry
82
- requirement: &19724060 !ruby/object:Gem::Requirement
82
+ requirement: &15576960 !ruby/object:Gem::Requirement
83
83
  none: false
84
84
  requirements:
85
85
  - - ~>
@@ -87,7 +87,7 @@ dependencies:
87
87
  version: 0.9.7.4
88
88
  type: :development
89
89
  prerelease: false
90
- version_requirements: *19724060
90
+ version_requirements: *15576960
91
91
  description: A Ruby library for extracting data from websites and web based APIs.
92
92
  Supports most common document formats (i.e. HTML, XML, and JSON), and comes with
93
93
  a handy mechanism for iterating over paginated datasets.