gh-archive 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/gh-archive.rb +25 -18
  3. metadata +10 -10
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c90ec7c3f14e2f57de8a145ad50c1f7730869c5aad0b1de33286accec99642c7
4
- data.tar.gz: a16c29db393499905695d411a62ca0dfb6e319d7fd509c104d41b2d0959b3414
3
+ metadata.gz: 0a12aa7954977bd9a8755f560530dd8db96b94e06f486e0a4155838ceb55e7f3
4
+ data.tar.gz: 8fc967a2e7a9bb0848c6e62a30231180c9a90ee96d72d6524c6b778ce54cbcbd
5
5
  SHA512:
6
- metadata.gz: f6a9feafaa7d0d06f75c489b147b2fc9b5485e88a28c15f98b1081c8b1f05fffc8218253ca817b0ecf64c1050f3e63ed4e1ad2d6aec9df5a4d362cf5ed2832bc
7
- data.tar.gz: 1bfa5ab2dbccc74bd2b162066bab6874fc3a2fc08b6d2ad9ec02c05a73078ee84abe1d069eb2b51b491134b35ea4ee0724b81aa3ffbd845b9997ba3860639c05
6
+ metadata.gz: 6f7c2ac9526de7e352825ed26c2ec3fc5375000e399529f45a07db7fbdaeef361f01a4782eb825f286e55311c3fc81dda7ecbcd509519e78da22147d1998771a
7
+ data.tar.gz: ab2b5f37f433ccde1d92406bd921c0c7a3a6ccaab41a3c11347a113c8c3db63beacf174ba6368be227302500b83bba59415f2cd85df59f9aff3f7f86f5c8b69c
data/lib/gh-archive.rb CHANGED
@@ -3,6 +3,7 @@ require 'json'
3
3
  require 'open-uri'
4
4
  require 'zlib'
5
5
  require 'logger'
6
+ require 'tmpdir'
6
7
 
7
8
  module GHAUtils
8
9
  def get_gha_filename(date)
@@ -101,11 +102,12 @@ class GHAProvider
101
102
  end
102
103
 
103
104
  class OnlineGHAProvider < GHAProvider
104
- def initialize(max_retries = 3, proactive = false)
105
+ def initialize(max_retries = 3, proactive = false, proactive_pool_size = 10)
105
106
  super()
106
107
 
107
108
  @max_retries = max_retries
108
109
  @proactive = proactive
110
+ @proactive_pool_size = proactive_pool_size
109
111
  @cache = Cache.new
110
112
  end
111
113
 
@@ -115,7 +117,7 @@ class OnlineGHAProvider < GHAProvider
115
117
  filename = self.get_gha_filename(current_time)
116
118
 
117
119
  if @cache.has?(filename)
118
- result = self.read_gha_file(@cache.get(filename))
120
+ result = @cache.get(filename)
119
121
  else
120
122
  URI.open("http://data.gharchive.org/#{filename}") do |gz|
121
123
  # Save to cache
@@ -123,7 +125,7 @@ class OnlineGHAProvider < GHAProvider
123
125
  end
124
126
  end
125
127
  rescue
126
- @logger.warning($!)
128
+ @logger.warn($!)
127
129
  end
128
130
  end
129
131
 
@@ -136,10 +138,12 @@ class OnlineGHAProvider < GHAProvider
136
138
  filename = self.get_gha_filename(current_time)
137
139
 
138
140
  URI.open("http://data.gharchive.org/#{filename}") do |gz|
139
- @cache.put(filename, gz.read)
141
+ content = self.read_gha_file(gz)
142
+ @cache.put(filename, content)
140
143
  return
141
144
  end
142
145
  rescue
146
+ p $!
143
147
  end
144
148
  end
145
149
  end
@@ -148,17 +152,19 @@ class OnlineGHAProvider < GHAProvider
148
152
  if @proactive
149
153
  @logger.info("Proactive download thread started")
150
154
  Thread.start do
155
+ pool = []
151
156
  self.each_date(from, to) do |current_date|
152
- self.cache(current_date)
153
- @logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
154
-
155
- if @cache.full?
156
- @logger.info("Full cache. Waiting...")
157
+ while pool.size > @proactive_pool_size || @cache.full?
158
+ pool.delete_if { |t| !t.alive? }
159
+ sleep 0.1
157
160
  end
158
161
 
159
- while @cache.full?
160
- sleep 1
162
+ pool << Thread.start do
163
+ self.cache(current_date)
164
+ @logger.info("Proactively cached #{current_date}. Cache size: #{@cache.size}")
161
165
  end
166
+
167
+ pool.delete_if { |t| !t.alive? }
162
168
  end
163
169
  end
164
170
  end
@@ -175,21 +181,22 @@ class OnlineGHAProvider < GHAProvider
175
181
  end
176
182
 
177
183
  def put(name, content)
178
- File.open("#@folder/#{name}", 'w') do |f|
179
- f << content
180
- end
184
+ #filename = "#@folder/#{name}"
185
+ #File.open(filename, 'w') do |f|
186
+ #f << content
187
+ #end
181
188
 
182
189
  @mutex.synchronize do
183
- @cache[name] = value
190
+ @cache[name] = content
184
191
  end
185
192
  end
186
193
 
187
194
  def get(name)
188
195
  @mutex.synchronize do
189
- return File.read(@cache[name])
196
+ return @cache.delete(name)
190
197
  end
191
198
  ensure
192
- self.unload(name)
199
+ #self.unload(name)
193
200
  end
194
201
 
195
202
  def unload(name)
@@ -204,7 +211,7 @@ class OnlineGHAProvider < GHAProvider
204
211
 
205
212
  def size
206
213
  @mutex.synchronize do
207
- @cache.size
214
+ return @cache.size
208
215
  end
209
216
  end
210
217
 
metadata CHANGED
@@ -1,33 +1,33 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gh-archive
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Simone Scalabrino
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-01-26 00:00:00.000000000 Z
11
+ date: 2021-08-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: code-assertions
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
19
  version: 1.1.2
20
- - - "~>"
20
+ - - ">="
21
21
  - !ruby/object:Gem::Version
22
22
  version: 1.1.2
23
23
  type: :runtime
24
24
  prerelease: false
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - ">="
27
+ - - "~>"
28
28
  - !ruby/object:Gem::Version
29
29
  version: 1.1.2
30
- - - "~>"
30
+ - - ">="
31
31
  - !ruby/object:Gem::Version
32
32
  version: 1.1.2
33
33
  description: Download and analyze the GitHub events stored at GitHub archive
@@ -41,7 +41,7 @@ homepage: https://github.com/intersimone999/gh-archive
41
41
  licenses:
42
42
  - GPL-3.0-only
43
43
  metadata: {}
44
- post_install_message:
44
+ post_install_message:
45
45
  rdoc_options: []
46
46
  require_paths:
47
47
  - lib
@@ -56,8 +56,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
56
  - !ruby/object:Gem::Version
57
57
  version: '0'
58
58
  requirements: []
59
- rubygems_version: 3.0.3
60
- signing_key:
59
+ rubygems_version: 3.2.21
60
+ signing_key:
61
61
  specification_version: 4
62
62
  summary: GitHub Archive mining utility
63
63
  test_files: []