page_by_page 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b0f56c80bdb70ae4aef3c770e9c0fb0554a2bad2
4
- data.tar.gz: 2bdb85e741a4de3c5d109fc6539f548e2d8f48d3
3
+ metadata.gz: ac89d6a7b6b45addd50b7738d2a8b90c6609b1f9
4
+ data.tar.gz: 3ea2de6bddb75c604dea419092b50e1f353fc4ec
5
5
  SHA512:
6
- metadata.gz: 510c3c2a34d8de3940659c798aa15758c62fd3bd7311fd14c3a09f77f0f9c9c4e4fd6fe19dfb992947e1ba07ecffc70605a82de747d8e95aa6a7ed74f76fb839
7
- data.tar.gz: 8c6ba45664bcdfe832ccd41017a27710cef380cf86c3dd77440cae0c27ecf57f38340af0df819a8fe7569fa60f308cdda6f9c00a4aa4e756bcef55ca2fa39a88
6
+ metadata.gz: 7a826438ea81f09e717674bbbc955af03676303056ad576c28905bfb74eb45a935f34e3127e85c19c0e5c31d3329a9f91b8e0fbdd87fc76a65b76ffa19dd41cd
7
+ data.tar.gz: 2706c0b89de6f87b454061e9372db5f5cb1885d5cd735df4d587421a6aa464576b64e948cbc1f5d22e91f3814a3d9b3935410a96d1977d6fab1becf0652aa54c
data/lib/page_by_page.rb CHANGED
@@ -1,7 +1,9 @@
1
1
  require 'page_by_page/version'
2
2
  require 'page_by_page/enum'
3
+ require 'page_by_page/mutex_enum'
3
4
  require 'nokogiri'
4
5
  require 'open-uri'
6
+ require 'erb'
5
7
 
6
8
  class PageByPage
7
9
 
@@ -36,23 +38,52 @@ class PageByPage
36
38
  @to = n
37
39
  end
38
40
 
41
+ def threads n
42
+ @threads = n
43
+ end
44
+
39
45
  def fetch
40
- enum = Enum.new options
46
+ nodes_2d =
47
+ unless @threads
48
+ @enum = Enum.new options
49
+ _fetch
50
+ else
51
+ @enum = MutexEnum.new options
52
+ parallel_fetch
53
+ end
54
+ nodes_2d.flatten
55
+ end
56
+
57
+ private
58
+
59
+ def _fetch
41
60
  items, all_items = [nil], []
42
61
  catch :no_more do
43
62
  until items.empty?
44
- n = enum.next
63
+ n = @enum.next
45
64
  break if n > limit
46
65
  url = @tmpl.result binding
47
66
  doc = parse url
48
67
  items = doc.css @selector
49
- all_items << items
68
+ all_items[n] = items
50
69
  end
51
70
  end
52
- all_items.flatten
71
+ all_items
53
72
  end
54
73
 
55
- private
74
+ def parallel_fetch
75
+ ts = @threads.times.map do |n|
76
+ Thread.new do
77
+ Thread.current[:sub] = _fetch
78
+ end
79
+ end
80
+ ts.each_with_object([]) do |t, pages|
81
+ t.join
82
+ t[:sub].each_with_index do |items, i|
83
+ pages[i] = items if items
84
+ end
85
+ end
86
+ end
56
87
 
57
88
  def parse url
58
89
  page = open(url)
@@ -1,20 +1,12 @@
1
- require 'forwardable'
2
- require 'erb'
3
-
4
1
  class PageByPage
5
2
  class Enum
6
- extend Forwardable
7
-
8
- def_delegator :@enum, :next
9
3
 
10
4
  def initialize from: 1, step: 1
11
- @enum = Enumerator.new do |yielder|
12
- n = from
13
- loop do
14
- yielder.yield n
15
- n = n + step
16
- end
17
- end
5
+ @enum = (from..Float::INFINITY).step(step).lazy.map(&:to_i).to_enum
6
+ end
7
+
8
+ def next
9
+ @enum.next
18
10
  end
19
11
 
20
12
  end
@@ -0,0 +1,22 @@
1
+ require 'page_by_page/enum'
2
+
3
+ class PageByPage
4
+ class MutexEnum < Enum
5
+
6
+ def initialize from: 1, step: 1
7
+ super
8
+ @q = SizedQueue.new 10
9
+ Thread.new do
10
+ loop do
11
+ @q << @enum.next
12
+ sleep 0.1
13
+ end
14
+ end
15
+ end
16
+
17
+ def next
18
+ @q.deq
19
+ end
20
+
21
+ end
22
+ end
@@ -1,3 +1,3 @@
1
1
  class PageByPage
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: page_by_page
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - ken
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-01 00:00:00.000000000 Z
11
+ date: 2017-01-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -84,6 +84,7 @@ files:
84
84
  - bin/setup
85
85
  - lib/page_by_page.rb
86
86
  - lib/page_by_page/enum.rb
87
+ - lib/page_by_page/mutex_enum.rb
87
88
  - lib/page_by_page/version.rb
88
89
  - page_by_page.gemspec
89
90
  homepage: https://github.com/turnon/page_by_page