license_auto 0.1.1.2 → 0.1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +9 -2
  5. data/README.md +10 -8
  6. data/lib/license_auto/exceptions.rb +10 -0
  7. data/lib/license_auto/license/frequency.rb +1 -1
  8. data/lib/license_auto/license/readme.rb +21 -2
  9. data/lib/license_auto/license/similarity.rb +4 -3
  10. data/lib/license_auto/license_info_wrapper.rb +16 -0
  11. data/lib/license_auto/matcher.rb +14 -4
  12. data/lib/license_auto/package.rb +29 -15
  13. data/lib/license_auto/package_manager.rb +6 -2
  14. data/lib/license_auto/package_manager/bundler.rb +10 -6
  15. data/lib/license_auto/package_manager/gemfury.rb +2 -0
  16. data/lib/license_auto/package_manager/git_module.rb +73 -0
  17. data/lib/license_auto/package_manager/golang.rb +155 -0
  18. data/lib/license_auto/package_manager/gradle.rb +145 -0
  19. data/lib/license_auto/package_manager/maven.rb +133 -0
  20. data/lib/license_auto/package_manager/npm.rb +117 -1
  21. data/lib/license_auto/package_manager/pip.rb +8 -2
  22. data/lib/license_auto/repo.rb +51 -3
  23. data/lib/license_auto/source_code_server.rb +13 -0
  24. data/lib/license_auto/var/golang_std_libs.rb +160 -0
  25. data/lib/license_auto/version.rb +1 -1
  26. data/lib/license_auto/website.rb +1 -0
  27. data/lib/license_auto/website/cocoapods_org.rb +10 -0
  28. data/lib/license_auto/website/github_com.rb +43 -18
  29. data/lib/license_auto/website/homepage_spider.rb +56 -0
  30. data/lib/license_auto/website/maven_central_repository.rb +258 -0
  31. data/lib/license_auto/website/npm_registry.rb +178 -0
  32. data/lib/license_auto/website/ruby_gems_org.rb +119 -8
  33. data/license_auto.gemspec +1 -0
  34. data/package.json +30 -0
  35. metadata +25 -40
  36. data/lib/license_auto/package_manager/golang_stdlibs.rb +0 -161
  37. data/remove/api.rb +0 -60
  38. data/remove/api/bitbucket.rb +0 -142
  39. data/remove/api/code_google_com.rb +0 -66
  40. data/remove/api/excel_export.rb +0 -189
  41. data/remove/api/gem_data.rb +0 -30
  42. data/remove/api/git_kernel_org.rb +0 -59
  43. data/remove/api/github.rb +0 -376
  44. data/remove/api/go_pkg_in.rb +0 -41
  45. data/remove/api/golang_org.rb +0 -63
  46. data/remove/api/google_source_com.rb +0 -68
  47. data/remove/api/gradle2.rb +0 -41
  48. data/remove/api/helper.rb +0 -26
  49. data/remove/api/j_center.rb +0 -23
  50. data/remove/api/maven_central_repository.rb +0 -192
  51. data/remove/api/mq.rb +0 -30
  52. data/remove/api/npm_registry.rb +0 -169
  53. data/remove/api/pattern.rb +0 -33
  54. data/remove/api/remote_source_package.rb +0 -319
  55. data/remove/api/spider.rb +0 -47
  56. data/remove/cloner.rb +0 -154
  57. data/remove/db.rb +0 -267
  58. data/remove/license_auto +0 -4
  59. data/remove/misc.rb +0 -131
  60. data/remove/mq_pack.rb +0 -219
  61. data/remove/mq_repo.rb +0 -106
  62. data/remove/parser/gemfile_parser.rb +0 -221
  63. data/remove/parser/golang_parser.rb +0 -104
  64. data/remove/parser/gradle_parser.rb +0 -141
  65. data/remove/parser/manifest_parser.rb +0 -66
  66. data/remove/parser/maven_parser.rb +0 -91
  67. data/remove/parser/npm_parser.rb +0 -82
  68. data/remove/parser/pip_parser.rb +0 -1
  69. data/remove/parser/rebar_parser.rb +0 -61
  70. data/remove/recorder.rb +0 -184
  71. data/remove/script/apt-get.deps.sh +0 -37
  72. data/remove/script/debian_dpkg_list.sh +0 -2
  73. data/remove/script/rubygems.org.importdb.sh +0 -22
data/remove/api/spider.rb DELETED
@@ -1,47 +0,0 @@
1
- require "httparty"
2
- require_relative '../../lib/misc'
3
-
4
- module API
5
-
6
- class Spider
7
- def initialize(homepage_url, pack_name)
8
- @homepage_url = homepage_url
9
- @pack_name = pack_name
10
- http_proxy = Misc.get_http_proxy
11
- @http_option = {}
12
- if http_proxy
13
- @http_option[:http_proxyaddr] = http_proxy[:addr]
14
- @http_option[:http_proxyport] = http_proxy[:port]
15
- end
16
- end
17
-
18
- def find_source_url()
19
- response = HTTParty.get(@homepage_url, options=@http_option)
20
- if response.code == 200
21
- body = response.licenses
22
- # TODO: author name valid
23
- pattern = /(http[s]?:\/\/(github\.com|bitbucket\.org)\/|git@(github\.com|bitbucket\.org):)(?<author>.+?)\/#{@pack_name}/i
24
- match_result = pattern.match(body)
25
- if match_result
26
- author = match_result['author']
27
- if author != nil
28
- source_url = "https://github.com/#{author}/#{@pack_name}"
29
- return source_url
30
- end
31
- end
32
- else
33
- # TODO: 404
34
- end
35
- end
36
-
37
- end
38
-
39
- end ### API
40
-
41
- if __FILE__ == $0
42
- url = "http://www.rubyonrails.org"
43
- pack_name = 'httparty'
44
- s = API::Spider.new(url, pack_name)
45
- source_url = s.find_source_url
46
- p source_url
47
- end
data/remove/cloner.rb DELETED
@@ -1,154 +0,0 @@
1
- require 'git'
2
- require 'fileutils'
3
- require 'json'
4
- require_relative '../config/config'
5
- require_relative '../lib/api/pattern'
6
- require_relative '../lib/api/github'
7
-
8
- module Cloner
9
-
10
- def self.make_path(repo)
11
- # todo: If repo is a git@github.com:xxx/yyy
12
- $plog.debug(repo)
13
- repo = repo.gsub(/(https:\/\/|http:\/\/|git@)/, '')
14
- path = "#{AUTO_ROOT}/#{repo}"
15
- end
16
-
17
- def self.clone_repo(repo, release_id, repo_id, reclone=false)
18
- path = self.make_path(repo)
19
- if reclone
20
- FileUtils::rm_rf(path)
21
- end
22
- $plog.debug("Cloning #{repo} into #{path}...")
23
- # begin
24
- # Git.configure do |config|
25
- # If you want to use a custom git binary
26
- # config.binary_path = '/git/bin/path'
27
-
28
- # If you need to use a custom SSH script
29
- # Config private SSH key on github.com
30
- # config.git_ssh = "#{AUTO_ROOT}/git_ssh_wrapper.sh"
31
- # end
32
- if Dir.exists?(path)
33
- if not $debug
34
- g = Git.open(path, :log => $plog)
35
- local_branch = g.branches.local[0].full
36
- g.pull(remote='origin', branch=local_branch)
37
- end
38
- else
39
- opts = {
40
- # :recursive => true
41
- # Only last commit history
42
- :depth => 1
43
- }
44
- local_repo = Git.clone(repo, path, opts)
45
- path = local_repo.dir.path
46
- end
47
- $plog.debug("Cloned #{repo} into #{path}.")
48
-
49
- process_gitmodules(path, release_id, repo_id)
50
-
51
- return path
52
- # rescue Git::GitExecuteError => e
53
- # $plog.error e
54
- # return nil
55
- # return self.clone_repo(repo, reclone=true)
56
- # end
57
- end
58
-
59
- # todo: Checkout a branch or tag
60
- def self.checkout_branch(branch)
61
- true
62
- end
63
-
64
- def self.process_gitmodules(clone_path, release_id, parent_repo_id)
65
- gitmodules = find_gitmodules(clone_path)
66
- $plog.info("gitmodules: #{gitmodules}")
67
- gitmodules.each {|url|
68
- homepage = nil
69
- # git@github.com:repo_owner/reop_name
70
- ssh_pattern = /^(?<username>.+)@/
71
- git_pattern = /^git:\/\//
72
- if url =~ ssh_pattern
73
- url = url.gsub(/:/, '/').gsub(ssh_pattern, 'https://')
74
- elsif url =~ git_pattern
75
- url = url.gsub(git_pattern, 'http://')
76
- end
77
- $plog.debug("gitmodules url: #{url}")
78
-
79
- remote = API::RemoteSourceVCS.new(url)
80
- homepage = remote.get_homepage
81
- g = remote.vcs
82
-
83
- sub_host, sub_repo_owner, sub_repo_name = g.host, g.owner, g.repo
84
- org_url = "#{sub_host}/#{sub_repo_owner}"
85
-
86
- if api_get_whitelist_orgs(org_url).ntuples > 0
87
- $plog.debug("whitelist_orgs: #{url}")
88
-
89
- new_added, sub_repo = add_repo(sub_repo_name, url, parent_repo_id=parent_repo_id)
90
- sub_repo_id = sub_repo['id'].to_i
91
- $plog.debug("sub_repo_id: #{sub_repo_id}, new_added: #{new_added}")
92
- case_items = api_query_product_repo(release_id, parent_repo_id)
93
- if case_items.ntuples > 0
94
- api_add_product_repo(release_id, parent_repo_id, sub_repo_id)
95
- end
96
-
97
- if new_added
98
- mq_publish_repo(release_id, sub_repo_id)
99
- end
100
- else
101
- $plog.debug("whitelist_orgs not: #{url}")
102
- pack_name = sub_repo_name
103
- last_commit = g.last_commits
104
- pack_version = last_commit ? last_commit['sha'] : nil
105
- lang = g.host
106
- source_url = g.repo_url
107
- license = cmt = nil
108
- status = 10
109
- add_pack_result = api_add_pack(pack_name, pack_version, lang, homepage, source_url, license, status, cmt)
110
- pack_id, is_newbie = add_pack_result['pack_id'].to_i, (add_pack_result['is_newbie'] == 't')
111
- r = api_add_product_repo_pack(parent_repo_id, pack_id, release_id)
112
- # $plog.debug("r: #{r}")
113
- if is_newbie
114
- queue_name = 'license_auto.pack'
115
- $rmq.publish(queue_name, {:pack_id => pack_id}.to_json, check_exist=true)
116
- end
117
- $plog.debug("pack_id: #{pack_id}, is_newbie: #{is_newbie}")
118
- end
119
- }
120
- end
121
-
122
- def self.mq_publish_repo(release_id, sub_repo_id)
123
- message = {
124
- :release_id => release_id,
125
- :repo_id => sub_repo_id
126
- }
127
- queue_name = 'license_auto.repo'
128
- $plog.info("submodule is Repo, enqueue MQ.repo, release_id: #{release_id}, sub_repo_id: #{sub_repo_id}")
129
- $rmq.publish(queue_name, message.to_json)
130
- end
131
-
132
- def self.find_gitmodules(clone_path)
133
- gitmodules = []
134
- filename = '.gitmodules'
135
- file = "#{clone_path}/#{filename}"
136
- submodules_exists = File.exists?(file)
137
- $plog.debug("submodules_exists: #{submodules_exists}")
138
- if submodules_exists
139
- contents = File.readlines(file)
140
- pattern = /url\s=\s(?<url>.+)(\.git)?$/
141
- contents.each {|line|
142
- matched = pattern.match(line)
143
- if matched
144
- $plog.debug("matched: #{matched}, submodule line: #{line}")
145
- gitmodules.push(matched[:url].gsub(/\.git$/, ''))
146
- end
147
- }
148
-
149
- end
150
- gitmodules
151
- end
152
-
153
- end
154
-
data/remove/db.rb DELETED
@@ -1,267 +0,0 @@
1
- require 'httparty'
2
- require_relative '../config/config'
3
- require_relative '../lib/message'
4
-
5
- def add_product(product_name)
6
- r = $conn.exec_params("select * from product where name = $1", [product_name])
7
- if r.ntuples == 1
8
- return false, r[0]
9
- else
10
- product = $conn.exec_params("insert into product (name) select $1 returning *", [product_name])
11
- if product.ntuples == 1
12
- return true, product[0]
13
- end
14
- end
15
- end
16
-
17
- def add_repo(repo_name, source_url, parent_repo_id=nil, priv=-1)
18
- r = $conn.exec_params("select * from repo where name = $1 and source_url = $2", [repo_name, source_url])
19
- if r.ntuples == 1
20
- return false, r[0]
21
- else
22
- repo = $conn.exec_params("insert into repo (name, source_url, priv, parent_repo_id) select $1, $2, $3, $4 returning *",
23
- [repo_name, source_url, priv, parent_repo_id])
24
- if repo.ntuples == 1
25
- return true, repo[0]
26
- end
27
- end
28
- end
29
-
30
- def api_get_repo_source_url(repo_id)
31
- source_url = nil
32
- r = $conn.exec_params("select source_url from repo where id = $1", [repo_id])
33
- if r.ntuples == 1
34
- source_url = r[0]['source_url']
35
- end
36
-
37
- source_url
38
- end
39
-
40
- def api_clear_relations(release_id, repo_id)
41
- r = $conn.exec_params("
42
- delete from product_repo_pack
43
- where id in (
44
- select r.id from product_repo_pack r
45
- join pack on pack.id = r.pack_id
46
- where product_repo_id in (
47
- select id from product_repo where
48
- release_id = $1
49
- and repo_id = $2
50
- )
51
- and pack.status < 41
52
- )", [release_id, repo_id])
53
- $plog.warn("You are rerunning the repo's deps, the history relation table data of this repo was deleted")
54
- end
55
-
56
- def api_get_case_by_id(case_id)
57
- repo_id = nil
58
- r = $conn.exec_params("select product_id, release_id, repo_id from product_repo where id = $1", [case_id])
59
- if r.ntuples == 1
60
- repo_id = r[0]
61
- end
62
- repo_id
63
- end
64
-
65
- def api_query_product_repo(release_id, repo_id)
66
- r = $conn.exec_params("
67
- select * from product_repo
68
- where release_id = $1
69
- and repo_id = $2", [release_id, repo_id])
70
- end
71
-
72
- def api_get_repo_manifest_file_list(repo_id)
73
- r = $conn.exec_params("select ymls from repo where id = $1", [repo_id])
74
- end
75
-
76
- def api_add_product_repo(release_id, parent_repo_id, sub_repo_id)
77
- all_products = $conn.exec_params("
78
- select * from product_repo
79
- where release_id = $1
80
- and repo_id = $2", [release_id, parent_repo_id])
81
- all_products.each {|p|
82
- product_id = p['product_id']
83
- begin
84
- pg_result = $conn.exec_params("select * from product_repo where release_id = $1 and product_id = $2 and repo_id = $3",
85
- [release_id, product_id, sub_repo_id])
86
- if pg_result.ntuples == 0
87
- $plog.debug("release_id: #{release_id}, repo_id: #{sub_repo_id}, parent_repo_id: #{parent_repo_id}")
88
- r = $conn.exec_params("insert into product_repo (release_id, product_id, repo_id) values ($1, $2, $3)",
89
- [release_id, product_id, sub_repo_id])
90
- end
91
- rescue Exception => e
92
- $plog.fatal(e)
93
- end
94
- }
95
-
96
- end
97
-
98
- def api_add_product_repo_pack(repo_id, pack_id, release_id)
99
- # $plog.debug("api_add_product_repo_pack: (repo_id=#{repo_id}, pack_id=#{pack_id}, release_id=#{release_id})")
100
- r = $conn.exec_params("select add_product_repo_pack($1, $2, $3)", [repo_id, pack_id, release_id])
101
- # $plog.debug("SQL: select add_product_repo_pack(#{repo_id}, #{pack_id}, #{release_id})")
102
- r[0]
103
- end
104
-
105
- def api_add_pack(pack_name, pack_version, lang, homepage, source_url, license, status, cmt, project_url=nil)
106
- # "select * from select add_pack('goose', 'unknown', 'Golang', null, null, null, null, null) as t(pack_id integer, new bool)"
107
- # $plog.info("status: #{status}")
108
- r = $conn.exec_params("select * from add_pack($1, $2, $3, $4, $5, $6, $7, $8, $9) as t(pack_id integer, is_newbie bool)",
109
- [pack_name, pack_version, lang, homepage, source_url, license, status, cmt, project_url])
110
- ret = nil
111
- if r.ntuples == 1
112
- ret = r[0]
113
- end
114
- ret
115
- end
116
-
117
- def api_get_complete_ratio(release_id, repo_id)
118
- # TODO: port
119
- ratio = 0.0
120
- api_path = '/api/v1/repo/complete_ratio'
121
- api_url = "#{LICENSE_WEBSITE_URL}#{api_path}"
122
- response = HTTParty.get(api_url,
123
- :query => {
124
- :release_id => release_id,
125
- :repo_id => repo_id
126
- })
127
- if response.code == 200
128
- ratio = response.licenses.to_f
129
- $plog.debug("complete_ratio: #{ratio}")
130
- if ratio >= 0.95
131
- release_name = nil
132
- repo_source_url = nil
133
- content = "Your release: #{release_name}, repo: #{repo_source_url} has completed, check it please."
134
- Message.send(content)
135
- end
136
- else
137
- $plog.error("#{response}")
138
- end
139
- ratio
140
- end
141
-
142
- def api_get_pack_by_id(pack_id)
143
- pack = nil
144
- r = $conn.exec_params("select id, name, version, source_url, lang, homepage, license, license_text, status from pack
145
- where id = $1", [pack_id])
146
- if r.ntuples == 1
147
- pack = r[0]
148
- end
149
- pack
150
- end
151
-
152
- def api_get_std_license_name(where='where 1 = 1')
153
- r = $conn.exec("select * from std_license #{where}")
154
- end
155
-
156
- def api_setup_pack_status(pack_id, status, cmt)
157
- $plog.debug(cmt)
158
- r = $conn.exec_params("update pack set status = $1, cmt = $2, update_at = now() where id = $3", [status, cmt[0..79], pack_id])
159
- end
160
-
161
- def api_setup_case_status(repo_id, status, cmt)
162
- $plog.debug(cmt)
163
- r = $conn.exec_params("update product_repo set status = $1, cmt = $2, update_at = now() where id = $3", [status, cmt[0..79], repo_id])
164
- end
165
-
166
-
167
- def api_update_pack_info(pack_id, pack)
168
- # r = $conn.exec_params("select update_pack($1,$2,$3,$4,$5,$6,$7,$8,$9)",[pack_id,pack['version'],pack['homepage'],pack['source_url'],pack['license_url'],pack['license'],pack['unclear_license'],pack['license_text'],pack['status']])
169
-
170
-
171
- r = $conn.exec_params("select update_pack($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)",
172
- [pack_id, pack[:version], pack[:homepage], pack[:source_url], pack[:license_url],
173
- pack[:license], pack[:unclear_license], pack[:license_text], pack[:status], pack[:project_url]])
174
- if(r[0] == -1)
175
- return false
176
- else
177
- return true
178
- end
179
- end
180
-
181
- def api_get_packs_by_name(name, version, lang)
182
- r = $conn.exec_params("select * from pack where name = $1 and version = $2 and lang = $3",[name, version, lang])
183
- return r
184
- end
185
-
186
- def api_get_gemdata_by_name(name)
187
- r = $gemconn.exec_params("select rubygems.name, versions.number, linksets.home, linksets.code, versions.licenses
188
- from rubygems, versions, linksets
189
- where rubygems.id = versions.rubygem_id
190
- and rubygems.id = linksets.rubygem_id
191
- and rubygems.name = $1
192
- and versions.latest = true
193
- and versions.platform = $2", [name, 'ruby'])
194
- end
195
-
196
- def api_get_gemdata_by_name_and_version(name, version)
197
- r = $gemconn.exec_params("select rubygems.name, versions.number, linksets.home, linksets.code, versions.licenses
198
- from rubygems, versions, linksets
199
- where rubygems.id = versions.rubygem_id
200
- and rubygems.id = linksets.rubygem_id
201
- and rubygems.name = $1
202
- and versions.number = $2", [name, version])
203
- end
204
-
205
- def api_get_template_result_by_product(name, release_name, release_version)
206
- list = nil
207
- r = $conn.exec_params("select product.name, repo.name, pack.name, pack.version, pack.unclear_license, pack.license, pack.license_text, pack.source_url
208
- from product_repo_pack
209
- join pack on product_repo_pack.pack_id = pack.id
210
- join product_repo on product_repo_pack.product_repo_id = product_repo.id
211
- join repo on product_repo.repo_id = repo.id
212
- join product on product_repo.product_id = product.id
213
- join release_tbl on product_repo.release_id = release_tbl.id
214
- where product.name = $1
215
- and release_tbl.name = $2
216
- and release_tbl.version = $3", [name, release_name, release_version])
217
- if r.ntuples > 0
218
- list = r
219
- end
220
- list
221
- end
222
-
223
- def api_get_repo_list_by_product(name, release_name, release_version)
224
- repo_list = nil
225
- r = $conn.exec_params("select product_repo.id, repo.name
226
- from product_repo
227
- join product on product_repo.product_id = product.id
228
- join repo on product_repo.repo_id = repo.id
229
- join release_tbl on product_repo.release_id = release_tbl.id
230
- where product.name = $1
231
- and release_tbl.name = $2
232
- and release_tbl.version = $3", [name, release_name, release_version])
233
- if r.ntuples > 0
234
- repo_list = r
235
- end
236
- repo_list
237
- end
238
-
239
- def api_get_template_result_by_product_repo_id(id)
240
- list = nil
241
- r = $conn.exec_params("select pack.name, pack.version, pack.unclear_license, pack.license, pack.license_text, pack.source_url
242
- from product_repo_pack
243
- join pack on product_repo_pack.pack_id = pack.id
244
- where product_repo_pack.product_repo_id = $1", [id])
245
- if r.ntuples > 0
246
- list = r
247
- end
248
- list
249
- end
250
-
251
- def api_get_manifest_download_url(pack_id)
252
- r = $conn.exec_params("select source_url from pack where id = $1", [pack_id])
253
- end
254
-
255
- # org_url: github.com/your_org_or_person_name
256
- def api_get_whitelist_orgs(org_url)
257
- r = $conn.exec_params("select * from whitelist_orgs where url = $1", [org_url])
258
- end
259
-
260
- def api_get_repo_by_url(url)
261
- r = $conn.exec_params("select * from repo where source_url = $1", [url])
262
- end
263
-
264
- if __FILE__ == $0
265
- # p api_get_repo_manifest_file_list(80).values[0]
266
- api_get_complete_ratio(1, 2)
267
- end