page_by_page 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/page_by_page.rb +36 -5
- data/lib/page_by_page/enum.rb +5 -13
- data/lib/page_by_page/mutex_enum.rb +22 -0
- data/lib/page_by_page/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ac89d6a7b6b45addd50b7738d2a8b90c6609b1f9
|
4
|
+
data.tar.gz: 3ea2de6bddb75c604dea419092b50e1f353fc4ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7a826438ea81f09e717674bbbc955af03676303056ad576c28905bfb74eb45a935f34e3127e85c19c0e5c31d3329a9f91b8e0fbdd87fc76a65b76ffa19dd41cd
|
7
|
+
data.tar.gz: 2706c0b89de6f87b454061e9372db5f5cb1885d5cd735df4d587421a6aa464576b64e948cbc1f5d22e91f3814a3d9b3935410a96d1977d6fab1becf0652aa54c
|
data/lib/page_by_page.rb
CHANGED
@@ -1,7 +1,9 @@
|
|
1
1
|
require 'page_by_page/version'
|
2
2
|
require 'page_by_page/enum'
|
3
|
+
require 'page_by_page/mutex_enum'
|
3
4
|
require 'nokogiri'
|
4
5
|
require 'open-uri'
|
6
|
+
require 'erb'
|
5
7
|
|
6
8
|
class PageByPage
|
7
9
|
|
@@ -36,23 +38,52 @@ class PageByPage
|
|
36
38
|
@to = n
|
37
39
|
end
|
38
40
|
|
41
|
+
def threads n
|
42
|
+
@threads = n
|
43
|
+
end
|
44
|
+
|
39
45
|
def fetch
|
40
|
-
|
46
|
+
nodes_2d =
|
47
|
+
unless @threads
|
48
|
+
@enum = Enum.new options
|
49
|
+
_fetch
|
50
|
+
else
|
51
|
+
@enum = MutexEnum.new options
|
52
|
+
parallel_fetch
|
53
|
+
end
|
54
|
+
nodes_2d.flatten
|
55
|
+
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def _fetch
|
41
60
|
items, all_items = [nil], []
|
42
61
|
catch :no_more do
|
43
62
|
until items.empty?
|
44
|
-
n = enum.next
|
63
|
+
n = @enum.next
|
45
64
|
break if n > limit
|
46
65
|
url = @tmpl.result binding
|
47
66
|
doc = parse url
|
48
67
|
items = doc.css @selector
|
49
|
-
all_items
|
68
|
+
all_items[n] = items
|
50
69
|
end
|
51
70
|
end
|
52
|
-
all_items
|
71
|
+
all_items
|
53
72
|
end
|
54
73
|
|
55
|
-
|
74
|
+
def parallel_fetch
|
75
|
+
ts = @threads.times.map do |n|
|
76
|
+
Thread.new do
|
77
|
+
Thread.current[:sub] = _fetch
|
78
|
+
end
|
79
|
+
end
|
80
|
+
ts.each_with_object([]) do |t, pages|
|
81
|
+
t.join
|
82
|
+
t[:sub].each_with_index do |items, i|
|
83
|
+
pages[i] = items if items
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
56
87
|
|
57
88
|
def parse url
|
58
89
|
page = open(url)
|
data/lib/page_by_page/enum.rb
CHANGED
@@ -1,20 +1,12 @@
|
|
1
|
-
require 'forwardable'
|
2
|
-
require 'erb'
|
3
|
-
|
4
1
|
class PageByPage
|
5
2
|
class Enum
|
6
|
-
extend Forwardable
|
7
|
-
|
8
|
-
def_delegator :@enum, :next
|
9
3
|
|
10
4
|
def initialize from: 1, step: 1
|
11
|
-
@enum =
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
end
|
17
|
-
end
|
5
|
+
@enum = (from..Float::INFINITY).step(step).lazy.map(&:to_i).to_enum
|
6
|
+
end
|
7
|
+
|
8
|
+
def next
|
9
|
+
@enum.next
|
18
10
|
end
|
19
11
|
|
20
12
|
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'page_by_page/enum'
|
2
|
+
|
3
|
+
class PageByPage
|
4
|
+
class MutexEnum < Enum
|
5
|
+
|
6
|
+
def initialize from: 1, step: 1
|
7
|
+
super
|
8
|
+
@q = SizedQueue.new 10
|
9
|
+
Thread.new do
|
10
|
+
loop do
|
11
|
+
@q << @enum.next
|
12
|
+
sleep 0.1
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def next
|
18
|
+
@q.deq
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
data/lib/page_by_page/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: page_by_page
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ken
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-01-
|
11
|
+
date: 2017-01-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -84,6 +84,7 @@ files:
|
|
84
84
|
- bin/setup
|
85
85
|
- lib/page_by_page.rb
|
86
86
|
- lib/page_by_page/enum.rb
|
87
|
+
- lib/page_by_page/mutex_enum.rb
|
87
88
|
- lib/page_by_page/version.rb
|
88
89
|
- page_by_page.gemspec
|
89
90
|
homepage: https://github.com/turnon/page_by_page
|