page_by_page 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b503de846c6f14e44ebb68d6da30f0647e3a9e42
4
- data.tar.gz: 0b74d6adf88048ed8a78441ec24fad1f686c8ac7
3
+ metadata.gz: e272639f27ba7c77e9c211f7189099dedade9d8f
4
+ data.tar.gz: b4571013bb77691327b37fe8a646cf162469d394
5
5
  SHA512:
6
- metadata.gz: 032d1274977d459339c51d44648b56ba3eb066818137866ed8c80753f0b9009bebf40a59ef614601034e56e02d466a8636cbf5af86bb9b68309da20c01ba09bd
7
- data.tar.gz: 70f97621564edf8ccbade3a23131ca53722f5e7357382d1803ffdf5e17cb1a359e397ac2d6d32c97d8411bca1391a2bc1c51aae6e9378ae1dd508cf50d254f40
6
+ metadata.gz: 12888cf1ddf115bfe0866339fc1c574176c757bfa13154a34d0c1c777c709c2ef083037018f08584a6550c14d9e22522a1cbed0e2dc7f981759af4c9bf28a9a9
7
+ data.tar.gz: 53de9357b1c06ce217e92ff3619b45455280fe351b281e37450ad17b7041737985a187cace0305dda93a62d284dc588393ab4de9fb1aa43e575dc41894a4ff8b
data/README.md CHANGED
@@ -24,6 +24,11 @@ Or install it yourself as:
24
24
  nodes = PageByPage.fetch do
25
25
  url 'https://book.douban.com/subject/25846075/comments/hot?p=<%= n %>'
26
26
  selector '.comment-item'
27
+ # from 2
28
+ # step 2
29
+ # to 100
30
+ # threads 4
31
+ # no_progress
27
32
  end
28
33
  ```
29
34
 
data/lib/page_by_page.rb CHANGED
@@ -15,6 +15,8 @@ class PageByPage
15
15
  end
16
16
 
17
17
  def initialize &block
18
+ @from, @step, @to = 1, 1, Float::INFINITY
19
+ @progress = {}
18
20
  instance_eval &block
19
21
  end
20
22
 
@@ -42,15 +44,20 @@ class PageByPage
42
44
  @threads = n
43
45
  end
44
46
 
47
+ def no_progress
48
+ @progress = nil
49
+ end
50
+
45
51
  def fetch
46
52
  nodes_2d =
47
- unless @threads
53
+ unless defined? @threads
48
54
  @enum = Enum.new options
49
55
  _fetch
50
56
  else
51
57
  @enum = MutexEnum.new options
52
58
  parallel_fetch
53
59
  end
60
+ puts if @progress
54
61
  nodes_2d.reject(&:nil?).flatten
55
62
  end
56
63
 
@@ -66,6 +73,7 @@ class PageByPage
66
73
  doc = parse url
67
74
  items = doc.css @selector
68
75
  pages[n] = items
76
+ update_progress Thread.current, n if @progress
69
77
  end
70
78
  end
71
79
  pages
@@ -97,14 +105,16 @@ class PageByPage
97
105
  end
98
106
 
99
107
  def options
100
- opt = {}
101
- opt[:from] = @from || 1
102
- opt[:step] = @step || 1
103
- opt
108
+ {from: @from, step: @step}
104
109
  end
105
110
 
106
111
  def limit
107
- @to || Float::INFINITY
112
+ @to ||= Float::INFINITY
113
+ end
114
+
115
+ def update_progress thread, page_num
116
+ @progress[thread] = page_num
117
+ printf "\r%s => %s", Time.now.strftime('%F %T'), @progress.values.sort
108
118
  end
109
119
 
110
120
  end
@@ -1,3 +1,3 @@
1
1
  class PageByPage
2
- VERSION = "0.1.7"
2
+ VERSION = "0.1.8"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: page_by_page
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.7
4
+ version: 0.1.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - ken
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-01-04 00:00:00.000000000 Z
11
+ date: 2017-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -108,7 +108,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
108
108
  version: '0'
109
109
  requirements: []
110
110
  rubyforge_project:
111
- rubygems_version: 2.6.7
111
+ rubygems_version: 2.6.8
112
112
  signing_key:
113
113
  specification_version: 4
114
114
  summary: scrape page by page , according to url pattern