license_auto 0.1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +47 -0
- data/.travis.yml +9 -0
- data/CHANGELOG.md +17 -0
- data/Gemfile +27 -0
- data/Gemfile.lock +103 -0
- data/LICENSE +23 -0
- data/README.md +136 -0
- data/Rakefile +20 -0
- data/examples/get_license_files_of_bundler.rb +12 -0
- data/lib/api.rb +60 -0
- data/lib/api/bitbucket.rb +142 -0
- data/lib/api/code_google_com.rb +66 -0
- data/lib/api/excel_export.rb +189 -0
- data/lib/api/gem_data.rb +30 -0
- data/lib/api/git_kernel_org.rb +59 -0
- data/lib/api/github.rb +376 -0
- data/lib/api/go_pkg_in.rb +41 -0
- data/lib/api/golang_org.rb +63 -0
- data/lib/api/google_source_com.rb +68 -0
- data/lib/api/gradle2.rb +41 -0
- data/lib/api/helper.rb +26 -0
- data/lib/api/j_center.rb +23 -0
- data/lib/api/maven_central_repository.rb +192 -0
- data/lib/api/mq.rb +30 -0
- data/lib/api/npm_registry.rb +169 -0
- data/lib/api/pattern.rb +33 -0
- data/lib/api/remote_source_package.rb +319 -0
- data/lib/api/spider.rb +47 -0
- data/lib/cloner.rb +154 -0
- data/lib/db.rb +267 -0
- data/lib/license_auto.rb +20 -0
- data/lib/license_auto/config/config.rb +52 -0
- data/lib/license_auto/errors.rb +0 -0
- data/lib/license_auto/license/frequency.rb +30 -0
- data/lib/license_auto/license/similarity.rb +247 -0
- data/lib/license_auto/license/templates/AFL2.0.txt +43 -0
- data/lib/license_auto/license/templates/AFL2.1.txt +47 -0
- data/lib/license_auto/license/templates/AFL3.0.txt +45 -0
- data/lib/license_auto/license/templates/AGPL3.0.txt +236 -0
- data/lib/license_auto/license/templates/APSL 2.0.txt +100 -0
- data/lib/license_auto/license/templates/Apache1.0.txt +18 -0
- data/lib/license_auto/license/templates/Apache1.1.txt +18 -0
- data/lib/license_auto/license/templates/Apache2.0.txt +201 -0
- data/lib/license_auto/license/templates/Artistic1.0.txt +45 -0
- data/lib/license_auto/license/templates/Artistic2.0.txt +70 -0
- data/lib/license_auto/license/templates/BSD.txt +29 -0
- data/lib/license_auto/license/templates/CDDL1.0.txt +1 -0
- data/lib/license_auto/license/templates/CDDL1.1.txt +209 -0
- data/lib/license_auto/license/templates/CPL1.0.txt +211 -0
- data/lib/license_auto/license/templates/CPOL 1.02.txt +41 -0
- data/lib/license_auto/license/templates/ClarifiedArtistic.txt +140 -0
- data/lib/license_auto/license/templates/Creative Commons1.0.txt +28 -0
- data/lib/license_auto/license/templates/EPL1.0.txt +70 -0
- data/lib/license_auto/license/templates/ERLANG1.1.txt +286 -0
- data/lib/license_auto/license/templates/GFDL1.1.txt +101 -0
- data/lib/license_auto/license/templates/GFDL1.2.txt +108 -0
- data/lib/license_auto/license/templates/GFDL1.3.txt +116 -0
- data/lib/license_auto/license/templates/GPL1.0.txt +105 -0
- data/lib/license_auto/license/templates/GPL2.0.txt +340 -0
- data/lib/license_auto/license/templates/GPL3.0.txt +220 -0
- data/lib/license_auto/license/templates/H2.txt +118 -0
- data/lib/license_auto/license/templates/IBMPublic1.0.txt +58 -0
- data/lib/license_auto/license/templates/LGPL2.0.txt +481 -0
- data/lib/license_auto/license/templates/LGPL2.1.txt +166 -0
- data/lib/license_auto/license/templates/LGPL3.0.txt +56 -0
- data/lib/license_auto/license/templates/Lucent Public License 1.02.txt +244 -0
- data/lib/license_auto/license/templates/MIT.txt +21 -0
- data/lib/license_auto/license/templates/MIT2.0.txt +11 -0
- data/lib/license_auto/license/templates/MPL 2.0.txt +373 -0
- data/lib/license_auto/license/templates/MPL1.1.txt +470 -0
- data/lib/license_auto/license/templates/MS-LPL.txt +22 -0
- data/lib/license_auto/license/templates/MS-PL.txt +20 -0
- data/lib/license_auto/license/templates/MS-RL.txt +19 -0
- data/lib/license_auto/license/templates/NPL1.0.txt +138 -0
- data/lib/license_auto/license/templates/OpenSSL.txt +127 -0
- data/lib/license_auto/license/templates/Oracle license.txt +69 -0
- data/lib/license_auto/license/templates/PublicDomain.txt +7 -0
- data/lib/license_auto/license/templates/Python.txt +15 -0
- data/lib/license_auto/license/templates/QPL1.0.txt +45 -0
- data/lib/license_auto/license/templates/RubyClause-6.txt +56 -0
- data/lib/license_auto/license/templates/SQLite Copyright.txt +33 -0
- data/lib/license_auto/license/templates/Sleepycat.txt +133 -0
- data/lib/license_auto/license/templates/SunPublic1.0.txt +183 -0
- data/lib/license_auto/license/templates/WTFPL license.txt +13 -0
- data/lib/license_auto/license_info.rb +12 -0
- data/lib/license_auto/matcher.rb +100 -0
- data/lib/license_auto/package.rb +73 -0
- data/lib/license_auto/parser/gemfile.rb +0 -0
- data/lib/license_auto/parser/golang.rb +0 -0
- data/lib/license_auto/parser/gradle.rb +0 -0
- data/lib/license_auto/version.rb +20 -0
- data/lib/license_auto/website.rb +11 -0
- data/lib/license_auto/website/github.rb +49 -0
- data/lib/license_auto/website/ruby_gems_org.rb +79 -0
- data/lib/license_auto/website/ruby_gems_org_db.rb +52 -0
- data/lib/license_auto/website/rubydoc_info.rb +12 -0
- data/lib/message.rb +5 -0
- data/lib/misc.rb +131 -0
- data/lib/parser/enums.rb +161 -0
- data/lib/parser/gemfile_parser.rb +221 -0
- data/lib/parser/golang_parser.rb +104 -0
- data/lib/parser/gradle_parser.rb +141 -0
- data/lib/parser/manifest_parser.rb +66 -0
- data/lib/parser/maven_parser.rb +91 -0
- data/lib/parser/npm_parser.rb +82 -0
- data/lib/parser/pip_parser.rb +1 -0
- data/lib/parser/rebar_parser.rb +61 -0
- data/lib/recorder.rb +184 -0
- data/lib/script/apt-get.deps.sh +37 -0
- data/lib/script/debian_dpkg_list.sh +2 -0
- data/lib/script/rubygems.org.importdb.sh +22 -0
- data/license_auto.gemspec +44 -0
- data/remove/license_auto +4 -0
- data/remove/mq_pack.rb +219 -0
- data/remove/mq_repo.rb +106 -0
- metadata +249 -0
data/lib/api/github.rb
ADDED
@@ -0,0 +1,376 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "base64"
|
3
|
+
require 'httparty'
|
4
|
+
require 'json'
|
5
|
+
require_relative '../misc'
|
6
|
+
require_relative '../api/pattern'
|
7
|
+
require_relative './helper'
|
8
|
+
|
9
|
+
module API
|
10
|
+
|
11
|
+
class Github
|
12
|
+
attr_reader :ref, :owner, :repo, :host, :repo_url
|
13
|
+
|
14
|
+
def initialize(repo_url, db_ref=nil)
|
15
|
+
ssh_pattern = /^git@/
|
16
|
+
git_pattern = /^git:\/\//
|
17
|
+
if repo_url =~ git_pattern
|
18
|
+
repo_url = repo_url.gsub(/^git:\/\//, 'https://')
|
19
|
+
elsif repo_url =~ ssh_pattern
|
20
|
+
repo_url = repo_url.gsub(/:/, '/').gsub(/^git@/, 'https://')
|
21
|
+
end
|
22
|
+
@repo_url = repo_url
|
23
|
+
|
24
|
+
repo_url = repo_url.gsub(/\.git$/, '')
|
25
|
+
repo_url_pattern = API::SOURCE_URL_PATTERN[:github]
|
26
|
+
# FIXME: error: http://github.com/TooTallNate/ansi.js -> http://github.com/TooTallNate/ansi
|
27
|
+
regex_group = repo_url_pattern.match(repo_url)
|
28
|
+
@protocol = regex_group[:protocol]
|
29
|
+
@host = regex_group[:host]
|
30
|
+
@owner = regex_group[:owner]
|
31
|
+
@repo = regex_group[:repo]
|
32
|
+
|
33
|
+
@http_option = Github.get_http_option
|
34
|
+
@ref = _match_a_ref(db_ref)
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.get_http_option
|
38
|
+
# TODO: config file
|
39
|
+
auth = {:username => ENV['github_username'], :password => ENV['github_password']}
|
40
|
+
http_option = {
|
41
|
+
:basic_auth => auth
|
42
|
+
}
|
43
|
+
http_proxy = Misc.get_http_proxy
|
44
|
+
if http_proxy
|
45
|
+
http_option[:http_proxyaddr] = http_proxy[:addr]
|
46
|
+
http_option[:http_proxyport] = http_proxy[:port]
|
47
|
+
end
|
48
|
+
http_option
|
49
|
+
end
|
50
|
+
|
51
|
+
def _match_a_ref(db_ref)
|
52
|
+
_ref = nil
|
53
|
+
# Golang version is a git sha
|
54
|
+
if db_ref == nil or db_ref.size == 40
|
55
|
+
return db_ref
|
56
|
+
else
|
57
|
+
# version number
|
58
|
+
all_refs = list_all_tags
|
59
|
+
# TODO: - + *
|
60
|
+
version_pattern = /[vV]?#{db_ref.gsub(/\./, '\.').gsub(/\//, '\/')}$/i
|
61
|
+
all_refs.each {|r|
|
62
|
+
ref = r['ref']
|
63
|
+
ref_name = ref.split('/').last
|
64
|
+
if ref_name =~ version_pattern
|
65
|
+
$plog.debug(ref_name)
|
66
|
+
return ref_name
|
67
|
+
end
|
68
|
+
}
|
69
|
+
return get_default_branch()
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# DOC: https://developer.github.com/v3/git/refs/
|
74
|
+
def list_all_tags
|
75
|
+
refs = []
|
76
|
+
api_url = "https://api.github.com/repos/#{@owner}/#{@repo}/git/refs/tags"
|
77
|
+
$plog.info("api_url: #{api_url}")
|
78
|
+
response = HTTParty.get(api_url, options=@http_option)
|
79
|
+
if response.code == 200
|
80
|
+
refs = JSON.parse(response.licenses)
|
81
|
+
elsif response.code == 403
|
82
|
+
$plog.error("!!! Github 403 Forbidden: #{response}")
|
83
|
+
elsif response.code == 404
|
84
|
+
$plog.error("!!! Github 404 Not found: #{response}")
|
85
|
+
else
|
86
|
+
$plog.error("!!! list_all_references() response: #{response}")
|
87
|
+
end
|
88
|
+
refs
|
89
|
+
end
|
90
|
+
|
91
|
+
# DOC: https://developer.github.com/v3/repos/commits/#list-commits-on-a-repository
|
92
|
+
def list_commits()
|
93
|
+
commits = nil
|
94
|
+
api_url = "https://api.github.com/repos/#{@owner}/#{@repo}/commits"
|
95
|
+
$plog.info("api_url: #{api_url}")
|
96
|
+
response = HTTParty.get(api_url, options=@http_option)
|
97
|
+
if response.code == 200
|
98
|
+
commits = JSON.parse(response.licenses)
|
99
|
+
elsif response.code == 403
|
100
|
+
$plog.error("!!! Github 403 Forbidden: #{response}")
|
101
|
+
elsif response.code == 404
|
102
|
+
$plog.error("!!! Github 404 Not found: #{response}")
|
103
|
+
else
|
104
|
+
$plog.error("!!! list_commits() response: #{response}")
|
105
|
+
end
|
106
|
+
commits
|
107
|
+
end
|
108
|
+
|
109
|
+
def last_commits()
|
110
|
+
last = nil
|
111
|
+
commits = list_commits
|
112
|
+
if commits and commits.length > 0
|
113
|
+
last = commits[0]
|
114
|
+
end
|
115
|
+
last
|
116
|
+
end
|
117
|
+
|
118
|
+
# DOC: https://developer.github.com/v3/repos/#get
|
119
|
+
def get_repo_info()
|
120
|
+
api_url = "https://api.github.com/repos/#{@owner}/#{@repo}"
|
121
|
+
response = HTTParty.get(api_url, options=@http_option)
|
122
|
+
if response.code == 200
|
123
|
+
contents = JSON.parse(response.licenses)
|
124
|
+
# p contents
|
125
|
+
elsif response.code == 403
|
126
|
+
$plog.error('!!! Github 403 Forbidden.')
|
127
|
+
else
|
128
|
+
$plog.error("!!! response.code: #{response.code}, response.body: #{response.body}")
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
def switch_to_default_branch
|
133
|
+
default_branch = get_default_branch
|
134
|
+
switched = default_branch != @ref
|
135
|
+
@ref = default_branch
|
136
|
+
return default_branch, switched
|
137
|
+
end
|
138
|
+
|
139
|
+
def get_default_branch
|
140
|
+
repo_info = get_repo_info
|
141
|
+
if repo_info
|
142
|
+
repo_info['default_branch']
|
143
|
+
else
|
144
|
+
nil
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def self.convert_htmlpage_to_raw_url(html_page)
|
149
|
+
raw_url = nil
|
150
|
+
content = nil
|
151
|
+
api_url = "#{html_page}?raw=true"
|
152
|
+
|
153
|
+
response = HTTParty.get(api_url, options=get_http_option)
|
154
|
+
# response = HTTParty.get(api_url, follow_redirects: true)
|
155
|
+
# $plog.debug(response.code)
|
156
|
+
if response.code == 200
|
157
|
+
raw_url = response.request.last_uri.to_s
|
158
|
+
$plog.debug(raw_url)
|
159
|
+
content = response.licenses
|
160
|
+
end
|
161
|
+
|
162
|
+
return raw_url, content
|
163
|
+
end
|
164
|
+
|
165
|
+
# DOC: https://developer.github.com/v3/repos/contents/#get-contents
|
166
|
+
def list_contents(path='')
|
167
|
+
contents = []
|
168
|
+
api_url = "https://api.github.com/repos/#{@owner}/#{@repo}/contents/#{path}"
|
169
|
+
if @ref
|
170
|
+
api_url += "?ref=#{@ref}"
|
171
|
+
end
|
172
|
+
|
173
|
+
$plog.info("list_contents: api_url: #{api_url}")
|
174
|
+
response = HTTParty.get(api_url, options=@http_option)
|
175
|
+
if response.code == 200
|
176
|
+
contents = JSON.parse(response.licenses)
|
177
|
+
elsif response.code == 403
|
178
|
+
$plog.error('!!! Github 403 Forbidden.')
|
179
|
+
else
|
180
|
+
$plog.error("!!! response.code: #{response.code}, response.body: #{response.body}")
|
181
|
+
end
|
182
|
+
contents
|
183
|
+
end
|
184
|
+
|
185
|
+
def filter_license_contents(path)
|
186
|
+
license_contents = {:license => [], :readme => []}
|
187
|
+
root_contents = list_contents(path)
|
188
|
+
root_contents.each do |c|
|
189
|
+
if c['type'] == 'file'
|
190
|
+
if API::Helper.is_license_file(c['name'])
|
191
|
+
license_contents[:license].push(c)
|
192
|
+
end
|
193
|
+
if API::Helper.is_readme_file(c['name'])
|
194
|
+
license_contents[:readme].push(c)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
license_contents
|
199
|
+
end
|
200
|
+
|
201
|
+
def filter_notice_contents(path = '')
|
202
|
+
notice_contents = []
|
203
|
+
root_contents = list_contents(path)
|
204
|
+
root_contents.each do |c|
|
205
|
+
if c['type'] == 'file'
|
206
|
+
if API::Helper.is_notice_file(c['name'])
|
207
|
+
notice_contents.push(c)
|
208
|
+
end
|
209
|
+
end
|
210
|
+
end
|
211
|
+
notice_contents
|
212
|
+
end
|
213
|
+
|
214
|
+
def get_gitmodules
|
215
|
+
gitmodules = nil
|
216
|
+
root_contents = list_contents(path)
|
217
|
+
root_contents.each do |c|
|
218
|
+
if c['type'] == 'file' and c['name'] == 'gitmodules'
|
219
|
+
gitmodules = c
|
220
|
+
end
|
221
|
+
end
|
222
|
+
gitmodules
|
223
|
+
end
|
224
|
+
|
225
|
+
# DOC: https://developer.github.com/v3/licenses/#get-the-contents-of-a-repositorys-license
|
226
|
+
def api_get_a_repositorys_license
|
227
|
+
license = license_url = license_text = nil
|
228
|
+
api_url = "https://api.github.com/repos/#{@owner}/#{@repo}/license"
|
229
|
+
if @ref
|
230
|
+
api_url += "?ref=#{@ref}"
|
231
|
+
end
|
232
|
+
|
233
|
+
$plog.info("api_get_a_repositorys_license: api_url: #{api_url}")
|
234
|
+
response = HTTParty.get(api_url, options=@http_option)
|
235
|
+
if response.code == 200
|
236
|
+
contents = JSON.parse(response.licenses)
|
237
|
+
license, license_url = contents['license']['name'], contents['download_url']
|
238
|
+
|
239
|
+
if contents['encoding'] == 'base64'
|
240
|
+
license_text = Base64.decode64(contents['content'])
|
241
|
+
else
|
242
|
+
license_text = 'DECODING ERROR!'
|
243
|
+
end
|
244
|
+
elsif response.code == 403
|
245
|
+
$plog.error('!!! Github 403 Forbidden.')
|
246
|
+
else
|
247
|
+
$plog.error("!!! response.code: #{response.code}, response.body: #{response.body}")
|
248
|
+
end
|
249
|
+
|
250
|
+
{
|
251
|
+
license: license,
|
252
|
+
license_url: license_url,
|
253
|
+
license_text: license_text
|
254
|
+
}
|
255
|
+
end
|
256
|
+
|
257
|
+
def get_license_info()
|
258
|
+
# license_info = api_get_a_repositorys_license
|
259
|
+
# if license_info[:license_url] != nil
|
260
|
+
# return license_info
|
261
|
+
# end
|
262
|
+
license = license_url = license_text = nil
|
263
|
+
license_contents = filter_license_contents(path='')
|
264
|
+
#$plog.debug("license_contents: #{license_contents}")
|
265
|
+
license_contents[:license].each do |c|
|
266
|
+
download_url = c['download_url']
|
267
|
+
$plog.info("License file 链接: #{download_url}")
|
268
|
+
response = HTTParty.get(download_url, options=@http_option)
|
269
|
+
if response.code == 200
|
270
|
+
license_text = response.licenses
|
271
|
+
license_url = download_url
|
272
|
+
|
273
|
+
$plog.info("license_text: #{license_text}")
|
274
|
+
|
275
|
+
# TODO: @Dragon, upgrade it to multi licenses
|
276
|
+
license = License_recognition.new.similarity(license_text, STD_LICENSE_DIR)
|
277
|
+
if license
|
278
|
+
break
|
279
|
+
end
|
280
|
+
else
|
281
|
+
# TODO: Use Dragon's HTML crawler, if API call limited
|
282
|
+
$plog.error("!!! response.code: #{response.code}, download_url: #{download_url}")
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
if license == nil
|
287
|
+
license_contents[:readme].each do |c|
|
288
|
+
$plog.debug(" README file name: #{c['name']}")
|
289
|
+
if File.extname(c['name']) =~ /\.(rdoc|txt|text)$/i
|
290
|
+
regular_start = /^==\s*(copying|license){1}:*/i
|
291
|
+
regular_end = /^==/
|
292
|
+
elsif File.extname(c['name']) =~ /\.(md|markdown)/i
|
293
|
+
regular_start = /^#+\s*(copying|license){1}:*/i
|
294
|
+
regular_end = /^#+/
|
295
|
+
else
|
296
|
+
next
|
297
|
+
end
|
298
|
+
download_url = c['download_url']
|
299
|
+
$plog.info("readme file 链接: #{download_url}")
|
300
|
+
response = HTTParty.get(download_url, options=@http_option)
|
301
|
+
if response.code == 200
|
302
|
+
readme_text = response.licenses # type : String
|
303
|
+
readme_url = download_url
|
304
|
+
$plog.info("readme_text: #{readme_text}")
|
305
|
+
start_flag = nil
|
306
|
+
end_flag = nil
|
307
|
+
readme_text.each_line("\n") do |line|
|
308
|
+
if line.encode('UTF-8', :invalid => :replace, :undef => :replace) =~ regular_start
|
309
|
+
a = readme_text =~ /#{line}/
|
310
|
+
start_flag = a + line.size
|
311
|
+
elsif nil != start_flag
|
312
|
+
if line =~ regular_end
|
313
|
+
end_flag = readme_text =~ /#{line}/
|
314
|
+
end
|
315
|
+
end
|
316
|
+
end
|
317
|
+
if start_flag.class == Fixnum and end_flag == nil
|
318
|
+
end_flag = readme_text.size
|
319
|
+
end
|
320
|
+
|
321
|
+
if start_flag != nil
|
322
|
+
#p "readme license info:"
|
323
|
+
readme_license = readme_text[start_flag,end_flag - start_flag]
|
324
|
+
license = License_recognition.new.similarity(readme_license, STD_LICENSE_DIR)
|
325
|
+
license_text = readme_license
|
326
|
+
license_url = download_url
|
327
|
+
break
|
328
|
+
else
|
329
|
+
end
|
330
|
+
|
331
|
+
if license
|
332
|
+
break
|
333
|
+
end
|
334
|
+
else
|
335
|
+
# TODO: Use Dragon's HTML crawler, if API call limited
|
336
|
+
$plog.error("!!! response.code: #{response.code}, download_url: #{download_url}")
|
337
|
+
end
|
338
|
+
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
if license == 'Apache2.0'
|
343
|
+
license_text = nil
|
344
|
+
items = []
|
345
|
+
notice_contents = filter_notice_contents
|
346
|
+
notice_contents.each do |c|
|
347
|
+
download_url = c['download_url']
|
348
|
+
$plog.info("License file 链接: #{download_url}")
|
349
|
+
response = HTTParty.get(download_url, options=@http_option)
|
350
|
+
if response.code == 200
|
351
|
+
items.push(response.licenses)
|
352
|
+
end
|
353
|
+
end
|
354
|
+
if items.size > 0
|
355
|
+
split_line = "\n"+'-'*80 + "\n"
|
356
|
+
license_text = items.join(split_line)
|
357
|
+
end
|
358
|
+
end
|
359
|
+
|
360
|
+
{
|
361
|
+
license: license,
|
362
|
+
license_url: license_url,
|
363
|
+
license_text: license_text
|
364
|
+
}
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
end
|
369
|
+
|
370
|
+
if __FILE__ == $0
|
371
|
+
url = 'https://github.com/aws/aws-sdk-ruby'
|
372
|
+
g = API::Github.new(url)
|
373
|
+
a = g.get_license_info
|
374
|
+
p a[:license_text]
|
375
|
+
|
376
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require_relative '../api/github'
|
2
|
+
|
3
|
+
module API
|
4
|
+
class GoPkgIn
|
5
|
+
attr_reader :repo_url, :protocol, :host, :owner, :repo, :ref
|
6
|
+
# DOC: http://labix.org/gopkg.in#SupportedURLs
|
7
|
+
# repo_url: https://gopkg.in/validator.v2 -> https://github.com/go-validator/validator/tree/v2
|
8
|
+
def initialize(repo_url, db_ref=nil)
|
9
|
+
@repo_url_perfix = 'https://github.com'
|
10
|
+
|
11
|
+
repo_url_pattern = API::SOURCE_URL_PATTERN[:go_pkg_in]
|
12
|
+
regex_group = repo_url_pattern.match(repo_url)
|
13
|
+
@protocol = regex_group[:protocol]
|
14
|
+
@host = regex_group[:host]
|
15
|
+
@repo = regex_group[:repo]
|
16
|
+
@owner = regex_group[:owner].nil? ? "go-#{@repo}" : regex_group[:owner]
|
17
|
+
|
18
|
+
# TODO: follow DOC: ()branch/tag v3, v3.N, or v3.N.M)
|
19
|
+
@ref = regex_group[:ref]
|
20
|
+
@repo_url = "#{@repo_url_perfix}/#{@owner}/#{@repo}"
|
21
|
+
end
|
22
|
+
|
23
|
+
def last_commits
|
24
|
+
g = API::Github.new(@repo_url, db_ref=@ref)
|
25
|
+
g.last_commits
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
if __FILE__ == $0
|
32
|
+
url = 'https://gopkg.in/validator.v2'
|
33
|
+
g = API::GoPkgIn.new(url)
|
34
|
+
p g.last_commits
|
35
|
+
p g.protocol
|
36
|
+
p g.host
|
37
|
+
p g.owner
|
38
|
+
p g.repo
|
39
|
+
p g.ref
|
40
|
+
p g.repo_url
|
41
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'anemone'
|
2
|
+
|
3
|
+
require_relative '../api/github'
|
4
|
+
require_relative '../api/pattern'
|
5
|
+
require_relative '../../lib/misc'
|
6
|
+
|
7
|
+
module API
|
8
|
+
class GolangOrg
|
9
|
+
attr_reader :repo_url, :protocol, :host, :owner, :repo, :ref
|
10
|
+
# DOC:
|
11
|
+
# repo_url: https://golang.org/x/crypto
|
12
|
+
def initialize(repo_url, db_ref=nil)
|
13
|
+
@golang_import_url = repo_url
|
14
|
+
golang_doc_prefix = 'https://godoc.org/'
|
15
|
+
@golang_doc_url = "#{golang_doc_prefix}#{repo_url.gsub(/http[s]?:\/\//, '')}"
|
16
|
+
|
17
|
+
repo_url_pattern = API::SOURCE_URL_PATTERN[:golang_org]
|
18
|
+
regex_group = repo_url_pattern.match(repo_url)
|
19
|
+
@protocol = regex_group[:protocol]
|
20
|
+
@host = regex_group[:host]
|
21
|
+
@repo = regex_group[:repo]
|
22
|
+
@owner = regex_group[:owner].nil? ? "go-#{@repo}" : regex_group[:owner]
|
23
|
+
@ref = nil
|
24
|
+
|
25
|
+
@http_option = {}
|
26
|
+
http_proxy = Misc.get_http_proxy
|
27
|
+
if http_proxy
|
28
|
+
@http_option[:proxy_host] = http_proxy[:addr]
|
29
|
+
@http_option[:proxy_port] = http_proxy[:port]
|
30
|
+
end
|
31
|
+
@repo_url = get_repo_url
|
32
|
+
end
|
33
|
+
|
34
|
+
def get_repo_url
|
35
|
+
opts = {:discard_page_bodies => true, :depth_limit => 0}.merge(@http_option)
|
36
|
+
Anemone.crawl(@golang_doc_url, opts) do |anemone|
|
37
|
+
anemone.on_every_page do |page|
|
38
|
+
# $plog.debug(page.body)
|
39
|
+
xpath = "//div[@id='x-projnav']/a[1]"
|
40
|
+
target_link = page.doc.xpath(xpath)
|
41
|
+
if target_link.size == 0
|
42
|
+
raise "last_commit error: #{self}, #{@repo_url}"
|
43
|
+
else
|
44
|
+
# short_sha = target_link.text()
|
45
|
+
href = target_link.attr('href').value
|
46
|
+
@repo_url = href
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
$plog.debug("@golang_import_url: #{@golang_import_url}, @repo_url: #{@repo_url}")
|
51
|
+
@repo_url
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
if __FILE__ == $0
|
58
|
+
url = 'https://google.golang.org/cloud/compute'
|
59
|
+
url = 'https://golang.org/x/crypto'
|
60
|
+
g = API::GolangOrg.new(url)
|
61
|
+
end
|
62
|
+
|
63
|
+
|