gh-archive 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/gh-archive.rb +25 -18
  3. metadata +10 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c90ec7c3f14e2f57de8a145ad50c1f7730869c5aad0b1de33286accec99642c7
4
- data.tar.gz: a16c29db393499905695d411a62ca0dfb6e319d7fd509c104d41b2d0959b3414
3
+ metadata.gz: 0a12aa7954977bd9a8755f560530dd8db96b94e06f486e0a4155838ceb55e7f3
4
+ data.tar.gz: 8fc967a2e7a9bb0848c6e62a30231180c9a90ee96d72d6524c6b778ce54cbcbd
5
5
  SHA512:
6
- metadata.gz: f6a9feafaa7d0d06f75c489b147b2fc9b5485e88a28c15f98b1081c8b1f05fffc8218253ca817b0ecf64c1050f3e63ed4e1ad2d6aec9df5a4d362cf5ed2832bc
7
- data.tar.gz: 1bfa5ab2dbccc74bd2b162066bab6874fc3a2fc08b6d2ad9ec02c05a73078ee84abe1d069eb2b51b491134b35ea4ee0724b81aa3ffbd845b9997ba3860639c05
6
+ metadata.gz: 6f7c2ac9526de7e352825ed26c2ec3fc5375000e399529f45a07db7fbdaeef361f01a4782eb825f286e55311c3fc81dda7ecbcd509519e78da22147d1998771a
7
+ data.tar.gz: ab2b5f37f433ccde1d92406bd921c0c7a3a6ccaab41a3c11347a113c8c3db63beacf174ba6368be227302500b83bba59415f2cd85df59f9aff3f7f86f5c8b69c
data/lib/gh-archive.rb CHANGED
@@ -3,6 +3,7 @@ require 'json'
3
3
  require 'open-uri'
4
4
  require 'zlib'
5
5
  require 'logger'
6
+ require 'tmpdir'
6
7
 
7
8
  module GHAUtils
8
9
  def get_gha_filename(date)
@@ -101,11 +102,12 @@ class GHAProvider
101
102
  end
102
103
 
103
104
  class OnlineGHAProvider < GHAProvider
104
- def initialize(max_retries = 3, proactive = false)
105
+ def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10)
105
106
  super()
106
107
 
107
108
  @max_retries = max_retries
108
109
  @proactive = proactive
110
+ @proactive_pool_size = proactive_pool_size
109
111
  @cache = Cache.new
110
112
  end
111
113
 
@@ -115,7 +117,7 @@ class OnlineGHAProvider < GHAProvider
115
117
  filename = self.get_gha_filename(current_time)
116
118
 
117
119
  if @cache.has?(filename)
118
- result = self.read_gha_file(@cache.get(filename))
120
+ result = @cache.get(filename)
119
121
  else
120
122
  URI.open("http://data.gharchive.org/#{filename}") do |gz|
121
123
  # Save to cache
@@ -123,7 +125,7 @@ class OnlineGHAProvider < GHAProvider
123
125
  end
124
126
  end
125
127
  rescue
126
- @logger.warning($!)
128
+ @logger.warn($!)
127
129
  end
128
130
  end
129
131
 
@@ -136,10 +138,12 @@ class OnlineGHAProvider < GHAProvider
136
138
  filename = self.get_gha_filename(current_time)
137
139
 
138
140
  URI.open("http://data.gharchive.org/#{filename}") do |gz|
139
- @cache.put(filename, gz.read)
141
+ content = self.read_gha_file(gz)
142
+ @cache.put(filename, content)
140
143
  return
141
144
  end
142
145
  rescue
146
+ p $!
143
147
  end
144
148
  end
145
149
  end
@@ -148,17 +152,19 @@ class OnlineGHAProvider < GHAProvider
148
152
  if @proactive
149
153
  @logger.info("Proactive download thread started")
150
154
  Thread.start do
155
+ pool = []
151
156
  self.each_date(from, to) do |current_date|
152
- self.cache(current_date)
153
- @logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
154
-
155
- if @cache.full?
156
- @logger.info("Full cache. Waiting...")
157
+ while pool.size > @proactive_pool_size || @cache.full?
158
+ pool.delete_if { |t| !t.alive? }
159
+ sleep 0.1
157
160
  end
158
161
 
159
- while @cache.full?
160
- sleep 1
162
+ pool << Thread.start do
163
+ self.cache(current_date)
164
+ @logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
161
165
  end
166
+
167
+ pool.delete_if { |t| !t.alive? }
162
168
  end
163
169
  end
164
170
  end
@@ -175,21 +181,22 @@ class OnlineGHAProvider < GHAProvider
175
181
  end
176
182
 
177
183
  def put(name, content)
178
- File.open("#@folder/#{name}", 'w') do |f|
179
- f << content
180
- end
184
+ #filename = "#@folder/#{name}"
185
+ #File.open(filename, 'w') do |f|
186
+ #f << content
187
+ #end
181
188
 
182
189
  @mutex.synchronize do
183
- @cache[name] = value
190
+ @cache[name] = content
184
191
  end
185
192
  end
186
193
 
187
194
  def get(name)
188
195
  @mutex.synchronize do
189
- return File.read(@cache[name])
196
+ return @cache.delete(name)
190
197
  end
191
198
  ensure
192
- self.unload(name)
199
+ #self.unload(name)
193
200
  end
194
201
 
195
202
  def unload(name)
@@ -204,7 +211,7 @@ class OnlineGHAProvider < GHAProvider
204
211
 
205
212
  def size
206
213
  @mutex.synchronize do
207
- @cache.size
214
+ return @cache.size
208
215
  end
209
216
  end
210
217
 
metadata CHANGED
@@ -1,33 +1,33 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gh-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-26 00:00:00.000000000 Z
11
+ date: 2021-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: code-assertions
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 1.1.2
20
- - - "~>"
20
+ - - ">="
21
21
  - !ruby/object:Gem::Version
22
22
  version: 1.1.2
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - ">="
27
+ - - "~>"
28
28
  - !ruby/object:Gem::Version
29
29
  version: 1.1.2
30
- - - "~>"
30
+ - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: 1.1.2
33
33
  description: Download and analyze the GitHub events stored at GitHub archive
@@ -41,7 +41,7 @@ homepage: https://github.com/intersimone999/gh-archive
41
41
  licenses:
42
42
  - GPL-3.0-only
43
43
  metadata: {}
44
- post_install_message:
44
+ post_install_message:
45
45
  rdoc_options: []
46
46
  require_paths:
47
47
  - lib
@@ -56,8 +56,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
56
  - !ruby/object:Gem::Version
57
57
  version: '0'
58
58
  requirements: []
59
- rubygems_version: 3.0.3
60
- signing_key:
59
+ rubygems_version: 3.2.21
60
+ signing_key:
61
61
  specification_version: 4
62
62
  summary: GitHub Archive mining utility
63
63
  test_files: []