page_by_page 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b0f56c80bdb70ae4aef3c770e9c0fb0554a2bad2
4
- data.tar.gz: 2bdb85e741a4de3c5d109fc6539f548e2d8f48d3
3
+ metadata.gz: ac89d6a7b6b45addd50b7738d2a8b90c6609b1f9
4
+ data.tar.gz: 3ea2de6bddb75c604dea419092b50e1f353fc4ec
5
5
  SHA512:
6
- metadata.gz: 510c3c2a34d8de3940659c798aa15758c62fd3bd7311fd14c3a09f77f0f9c9c4e4fd6fe19dfb992947e1ba07ecffc70605a82de747d8e95aa6a7ed74f76fb839
7
- data.tar.gz: 8c6ba45664bcdfe832ccd41017a27710cef380cf86c3dd77440cae0c27ecf57f38340af0df819a8fe7569fa60f308cdda6f9c00a4aa4e756bcef55ca2fa39a88
6
+ metadata.gz: 7a826438ea81f09e717674bbbc955af03676303056ad576c28905bfb74eb45a935f34e3127e85c19c0e5c31d3329a9f91b8e0fbdd87fc76a65b76ffa19dd41cd
7
+ data.tar.gz: 2706c0b89de6f87b454061e9372db5f5cb1885d5cd735df4d587421a6aa464576b64e948cbc1f5d22e91f3814a3d9b3935410a96d1977d6fab1becf0652aa54c
data/lib/page_by_page.rb CHANGED
@@ -1,7 +1,9 @@
1
1
  require 'page_by_page/version'
2
2
  require 'page_by_page/enum'
3
+ require 'page_by_page/mutex_enum'
3
4
  require 'nokogiri'
4
5
  require 'open-uri'
6
+ require 'erb'
5
7
 
6
8
  class PageByPage
7
9
 
@@ -36,23 +38,52 @@ class PageByPage
36
38
  @to = n
37
39
  end
38
40
 
41
+ def threads n
42
+ @threads = n
43
+ end
44
+
39
45
  def fetch
40
- enum = Enum.new options
46
+ nodes_2d =
47
+ unless @threads
48
+ @enum = Enum.new options
49
+ _fetch
50
+ else
51
+ @enum = MutexEnum.new options
52
+ parallel_fetch
53
+ end
54
+ nodes_2d.flatten
55
+ end
56
+
57
+ private
58
+
59
+ def _fetch
41
60
  items, all_items = [nil], []
42
61
  catch :no_more do
43
62
  until items.empty?
44
- n = enum.next
63
+ n = @enum.next
45
64
  break if n > limit
46
65
  url = @tmpl.result binding
47
66
  doc = parse url
48
67
  items = doc.css @selector
49
- all_items << items
68
+ all_items[n] = items
50
69
  end
51
70
  end
52
- all_items.flatten
71
+ all_items
53
72
  end
54
73
 
55
- private
74
+ def parallel_fetch
75
+ ts = @threads.times.map do |n|
76
+ Thread.new do
77
+ Thread.current[:sub] = _fetch
78
+ end
79
+ end
80
+ ts.each_with_object([]) do |t, pages|
81
+ t.join
82
+ t[:sub].each_with_index do |items, i|
83
+ pages[i] = items if items
84
+ end
85
+ end
86
+ end
56
87
 
57
88
  def parse url
58
89
  page = open(url)
@@ -1,20 +1,12 @@
1
- require 'forwardable'
2
- require 'erb'
3
-
4
1
  class PageByPage
5
2
  class Enum
6
- extend Forwardable
7
-
8
- def_delegator :@enum, :next
9
3
 
10
4
  def initialize from: 1, step: 1
11
- @enum = Enumerator.new do |yielder|
12
- n = from
13
- loop do
14
- yielder.yield n
15
- n = n + step
16
- end
17
- end
5
+ @enum = (from..Float::INFINITY).step(step).lazy.map(&:to_i).to_enum
6
+ end
7
+
8
+ def next
9
+ @enum.next
18
10
  end
19
11
 
20
12
  end
@@ -0,0 +1,22 @@
1
+ require 'page_by_page/enum'
2
+
3
+ class PageByPage
4
+ class MutexEnum < Enum
5
+
6
+ def initialize from: 1, step: 1
7
+ super
8
+ @q = SizedQueue.new 10
9
+ Thread.new do
10
+ loop do
11
+ @q << @enum.next
12
+ sleep 0.1
13
+ end
14
+ end
15
+ end
16
+
17
+ def next
18
+ @q.deq
19
+ end
20
+
21
+ end
22
+ end
@@ -1,3 +1,3 @@
1
1
  class PageByPage
2
- VERSION = "0.1.5"
2
+ VERSION = "0.1.6"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: page_by_page
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - ken
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-01 00:00:00.000000000 Z
11
+ date: 2017-01-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -84,6 +84,7 @@ files:
84
84
  - bin/setup
85
85
  - lib/page_by_page.rb
86
86
  - lib/page_by_page/enum.rb
87
+ - lib/page_by_page/mutex_enum.rb
87
88
  - lib/page_by_page/version.rb
88
89
  - page_by_page.gemspec
89
90
  homepage: https://github.com/turnon/page_by_page