page_by_page 0.1.7 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b503de846c6f14e44ebb68d6da30f0647e3a9e42
4
- data.tar.gz: 0b74d6adf88048ed8a78441ec24fad1f686c8ac7
3
+ metadata.gz: e272639f27ba7c77e9c211f7189099dedade9d8f
4
+ data.tar.gz: b4571013bb77691327b37fe8a646cf162469d394
5
5
  SHA512:
6
- metadata.gz: 032d1274977d459339c51d44648b56ba3eb066818137866ed8c80753f0b9009bebf40a59ef614601034e56e02d466a8636cbf5af86bb9b68309da20c01ba09bd
7
- data.tar.gz: 70f97621564edf8ccbade3a23131ca53722f5e7357382d1803ffdf5e17cb1a359e397ac2d6d32c97d8411bca1391a2bc1c51aae6e9378ae1dd508cf50d254f40
6
+ metadata.gz: 12888cf1ddf115bfe0866339fc1c574176c757bfa13154a34d0c1c777c709c2ef083037018f08584a6550c14d9e22522a1cbed0e2dc7f981759af4c9bf28a9a9
7
+ data.tar.gz: 53de9357b1c06ce217e92ff3619b45455280fe351b281e37450ad17b7041737985a187cace0305dda93a62d284dc588393ab4de9fb1aa43e575dc41894a4ff8b
data/README.md CHANGED
@@ -24,6 +24,11 @@ Or install it yourself as:
24
24
  nodes = PageByPage.fetch do
25
25
  url 'https://book.douban.com/subject/25846075/comments/hot?p=<%= n %>'
26
26
  selector '.comment-item'
27
+ # from 2
28
+ # step 2
29
+ # to 100
30
+ # threads 4
31
+ # no_progress
27
32
  end
28
33
  ```
29
34
 
data/lib/page_by_page.rb CHANGED
@@ -15,6 +15,8 @@ class PageByPage
15
15
  end
16
16
 
17
17
  def initialize &block
18
+ @from, @step, @to = 1, 1, Float::INFINITY
19
+ @progress = {}
18
20
  instance_eval &block
19
21
  end
20
22
 
@@ -42,15 +44,20 @@ class PageByPage
42
44
  @threads = n
43
45
  end
44
46
 
47
+ def no_progress
48
+ @progress = nil
49
+ end
50
+
45
51
  def fetch
46
52
  nodes_2d =
47
- unless @threads
53
+ unless defined? @threads
48
54
  @enum = Enum.new options
49
55
  _fetch
50
56
  else
51
57
  @enum = MutexEnum.new options
52
58
  parallel_fetch
53
59
  end
60
+ puts if @progress
54
61
  nodes_2d.reject(&:nil?).flatten
55
62
  end
56
63
 
@@ -66,6 +73,7 @@ class PageByPage
66
73
  doc = parse url
67
74
  items = doc.css @selector
68
75
  pages[n] = items
76
+ update_progress Thread.current, n if @progress
69
77
  end
70
78
  end
71
79
  pages
@@ -97,14 +105,16 @@ class PageByPage
97
105
  end
98
106
 
99
107
  def options
100
- opt = {}
101
- opt[:from] = @from || 1
102
- opt[:step] = @step || 1
103
- opt
108
+ {from: @from, step: @step}
104
109
  end
105
110
 
106
111
  def limit
107
- @to || Float::INFINITY
112
+ @to ||= Float::INFINITY
113
+ end
114
+
115
+ def update_progress thread, page_num
116
+ @progress[thread] = page_num
117
+ printf "\r%s => %s", Time.now.strftime('%F %T'), @progress.values.sort
108
118
  end
109
119
 
110
120
  end
@@ -1,3 +1,3 @@
1
1
  class PageByPage
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.8"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: page_by_page
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - ken
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-04 00:00:00.000000000 Z
11
+ date: 2017-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  version: '0'
109
109
  requirements: []
110
110
  rubyforge_project:
111
- rubygems_version: 2.6.7
111
+ rubygems_version: 2.6.8
112
112
  signing_key:
113
113
  specification_version: 4
114
114
  summary: scrape page by page , according to url pattern