license_auto 0.1.1.2 → 0.1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +9 -2
  5. data/README.md +10 -8
  6. data/lib/license_auto/exceptions.rb +10 -0
  7. data/lib/license_auto/license/frequency.rb +1 -1
  8. data/lib/license_auto/license/readme.rb +21 -2
  9. data/lib/license_auto/license/similarity.rb +4 -3
  10. data/lib/license_auto/license_info_wrapper.rb +16 -0
  11. data/lib/license_auto/matcher.rb +14 -4
  12. data/lib/license_auto/package.rb +29 -15
  13. data/lib/license_auto/package_manager.rb +6 -2
  14. data/lib/license_auto/package_manager/bundler.rb +10 -6
  15. data/lib/license_auto/package_manager/gemfury.rb +2 -0
  16. data/lib/license_auto/package_manager/git_module.rb +73 -0
  17. data/lib/license_auto/package_manager/golang.rb +155 -0
  18. data/lib/license_auto/package_manager/gradle.rb +145 -0
  19. data/lib/license_auto/package_manager/maven.rb +133 -0
  20. data/lib/license_auto/package_manager/npm.rb +117 -1
  21. data/lib/license_auto/package_manager/pip.rb +8 -2
  22. data/lib/license_auto/repo.rb +51 -3
  23. data/lib/license_auto/source_code_server.rb +13 -0
  24. data/lib/license_auto/var/golang_std_libs.rb +160 -0
  25. data/lib/license_auto/version.rb +1 -1
  26. data/lib/license_auto/website.rb +1 -0
  27. data/lib/license_auto/website/cocoapods_org.rb +10 -0
  28. data/lib/license_auto/website/github_com.rb +43 -18
  29. data/lib/license_auto/website/homepage_spider.rb +56 -0
  30. data/lib/license_auto/website/maven_central_repository.rb +258 -0
  31. data/lib/license_auto/website/npm_registry.rb +178 -0
  32. data/lib/license_auto/website/ruby_gems_org.rb +119 -8
  33. data/license_auto.gemspec +1 -0
  34. data/package.json +30 -0
  35. metadata +25 -40
  36. data/lib/license_auto/package_manager/golang_stdlibs.rb +0 -161
  37. data/remove/api.rb +0 -60
  38. data/remove/api/bitbucket.rb +0 -142
  39. data/remove/api/code_google_com.rb +0 -66
  40. data/remove/api/excel_export.rb +0 -189
  41. data/remove/api/gem_data.rb +0 -30
  42. data/remove/api/git_kernel_org.rb +0 -59
  43. data/remove/api/github.rb +0 -376
  44. data/remove/api/go_pkg_in.rb +0 -41
  45. data/remove/api/golang_org.rb +0 -63
  46. data/remove/api/google_source_com.rb +0 -68
  47. data/remove/api/gradle2.rb +0 -41
  48. data/remove/api/helper.rb +0 -26
  49. data/remove/api/j_center.rb +0 -23
  50. data/remove/api/maven_central_repository.rb +0 -192
  51. data/remove/api/mq.rb +0 -30
  52. data/remove/api/npm_registry.rb +0 -169
  53. data/remove/api/pattern.rb +0 -33
  54. data/remove/api/remote_source_package.rb +0 -319
  55. data/remove/api/spider.rb +0 -47
  56. data/remove/cloner.rb +0 -154
  57. data/remove/db.rb +0 -267
  58. data/remove/license_auto +0 -4
  59. data/remove/misc.rb +0 -131
  60. data/remove/mq_pack.rb +0 -219
  61. data/remove/mq_repo.rb +0 -106
  62. data/remove/parser/gemfile_parser.rb +0 -221
  63. data/remove/parser/golang_parser.rb +0 -104
  64. data/remove/parser/gradle_parser.rb +0 -141
  65. data/remove/parser/manifest_parser.rb +0 -66
  66. data/remove/parser/maven_parser.rb +0 -91
  67. data/remove/parser/npm_parser.rb +0 -82
  68. data/remove/parser/pip_parser.rb +0 -1
  69. data/remove/parser/rebar_parser.rb +0 -61
  70. data/remove/recorder.rb +0 -184
  71. data/remove/script/apt-get.deps.sh +0 -37
  72. data/remove/script/debian_dpkg_list.sh +0 -2
  73. data/remove/script/rubygems.org.importdb.sh +0 -22
@@ -0,0 +1,56 @@
1
+ require 'open-uri'
2
+
3
+ require 'license_auto/license/similarity'
4
+
5
+ class HomepageSpider
6
+
7
+ SOURCE_CODE_URI_PATTERN = /(github\.com|bitbucket\.org)\/.*\/#{@package.name}/
8
+
9
+ def initialize(homepage, package_name)
10
+ @homepage = homepage
11
+ @package_name = package_name
12
+ end
13
+
14
+ # Eg. 2
15
+ # Name:
16
+ # sequel
17
+ # Version:
18
+ # 4.32.0
19
+ # Lang:
20
+ # rubygems.org
21
+ # http://sequel.jeremyevans.net/development.html
22
+ # -> https://github.com/jeremyevans/sequel/
23
+ def get_source_code_uri
24
+
25
+ end
26
+
27
+ # Eg.
28
+ # homepage = 'https://www.sqlite.org/'
29
+ # pack_name = 'sqlite3'
30
+ # spider = HomepageSpider.new(homepage, pack_name)
31
+ # license_page = spider.get_license_page
32
+ # license_wrapper = LicenseWrapper.new(license_page)
33
+ # @return
34
+ # license_wrapper = {
35
+ # html_url: 'https://www.sqlite.org/copyright.html',
36
+ # text: 'xxx'
37
+ # }
38
+ def get_license_page
39
+ html_url = 'https://www.sqlite.org/copyright.html'
40
+ text = open(html_url).read
41
+ license_name, sim_ratio = LicenseAuto::Similarity.new(text).most_license_sim
42
+ license_wrapper = LicenseWrapper.new(
43
+ html_url: html_url,
44
+ text: text,
45
+ name: license_name,
46
+ sim_ratio: sim_ratio
47
+ )
48
+ end
49
+
50
+
51
+ def get_license_info
52
+ # source_code_uri = get_source_code_uri
53
+ license_info = nil
54
+ end
55
+
56
+ end
@@ -0,0 +1,258 @@
1
+ # [Maven Repository Centre](https://maven.apache.org/repository/index.html)
2
+ # [API Guide](http://search.maven.org/#api)
3
+ require 'json'
4
+ require 'hashie'
5
+ require 'nokogiri'
6
+ require 'httparty'
7
+
8
+ require 'license_auto/license/similarity'
9
+
10
+ module LicenseAuto
11
+ class MavenCentralRepository
12
+
13
+ # RESTful Sample
14
+ # GET http://search.maven.org/solrsearch/select?q=g%3A%22com.google.inject%22&rows=20&wt=json
15
+ REST_API = 'http://search.maven.org/solrsearch'
16
+
17
+
18
+ def initialize(group_id, artifact_id, version, central_prefix='https://repo1.maven.org/maven2')
19
+ @group_id = group_id
20
+ @artifact_id = artifact_id
21
+ @version = version
22
+ @classifier = ''
23
+ @central_prefix = central_prefix
24
+ end
25
+
26
+ # Example:
27
+ # GET http://search.maven.org/solrsearch/select?q=g:"com.google.inject"+AND+a:"guice"&core=gav&rows=20&wt=json
28
+ # @return:
29
+ # {
30
+ # "responseHeader":{
31
+ # "status":0,
32
+ # "QTime":0,
33
+ # "params":{
34
+ # "fl":"id,g,a,v,p,ec,timestamp,tags",
35
+ # "sort":"score desc,timestamp desc,g asc,a asc,v desc",
36
+ # "indent":"off",
37
+ # "q":"g:\"net.sourceforge.nekohtml\" AND a:\"nekohtml\" AND v:\"1.9.20\"",
38
+ # "core":"gav",
39
+ # "wt":"json",
40
+ # "rows":"20",
41
+ # "version":"2.2"
42
+ # }
43
+ # },
44
+ # "response":{
45
+ # "numFound":1,
46
+ # "start":0,
47
+ # "docs":[
48
+ # {
49
+ # "id":"net.sourceforge.nekohtml:nekohtml:1.9.20",
50
+ # "g":"net.sourceforge.nekohtml",
51
+ # "a":"nekohtml",
52
+ # "v":"1.9.20",
53
+ # "p":"jar",
54
+ # "timestamp":1392301277000,
55
+ # "tags":[
56
+ # "html",
57
+ # "parser",
58
+ # "balancer"
59
+ # ],
60
+ # "ec":[
61
+ # "-sources.jar",
62
+ # "-javadoc.jar",
63
+ # ".jar",
64
+ # ".pom"
65
+ # ]
66
+ # }
67
+ # ]
68
+ # }
69
+ # }
70
+ def select()
71
+ url =
72
+ if has_version
73
+ "#{REST_API}/select?q=g:\"#{@group_id}\"+AND+a:\"#{@artifact_id}\"+AND+v:\"#{@version}\"&core=gav&rows=20&wt=json"
74
+ else
75
+ "#{REST_API}/select?q=g:\"#{@group_id}\"+AND+a:\"#{@artifact_id}\"&core=gav&rows=20&wt=json"
76
+ end
77
+
78
+ url = URI.escape(url)
79
+ response = HTTParty.get(url)
80
+ if response.code == 200
81
+ Hashie::Mash.new(JSON.parse(response.body))
82
+ else
83
+ error = "CentralRepository select error:\n#{response}"
84
+ LicenseAuto.logger.debug(url)
85
+ LicenseAuto.logger.error(error)
86
+ nil
87
+ end
88
+ end
89
+
90
+ # Eg: http://search.maven.org/solrsearch/select?q=g:%22com.google.inject%22%20AND%20a:%22guice%22%20AND%20v:%223.0%22%20AND%20l:%22javadoc%22%20AND%20p:%22jar%22&rows=20&wt=json
91
+ def advance_search
92
+ url = "http://search.maven.org/solrsearch/select?q=g:\"#{@group_id}\" AND a:\"#{@artifact_id}\" AND v:\"#{@version}\" AND l:\"#{@classifier}\" AND p:\"jar\"&rows=20&wt=json"
93
+ url = URI.escape(url)
94
+ LicenseAuto.logger.debug("api_url: #{url}")
95
+ response = HTTParty.get(url)
96
+ if response.code == 200
97
+ query_set = JSON.parse(response.body)
98
+ else
99
+ raise "CentralRepository select error: #{response}"
100
+ end
101
+ end
102
+
103
+ def get_package_pom(group, name, version)
104
+ pom_url = make_pom_url(group, name, version)
105
+ # http://stackoverflow.com/questions/25814210/opensslsslsslerror-ssl-connect-syscall-returned-5-errno-0-state-sslv3-read
106
+ response = HTTParty.get(pom_url, :ssl_version => 'TLSv1')
107
+ pom_str =
108
+ if response.code == 200
109
+ LicenseAuto.logger.debug("pom_url: #{pom_url}")
110
+ response.body
111
+ else
112
+ LicenseAuto.logger.error("pom_url: #{pom_url}, #{response}")
113
+ end
114
+ [pom_url, pom_str]
115
+ end
116
+
117
+ # Example: https://repo1.maven.org/maven2/com/google/inject/guice/3.0/guice-3.0.pom
118
+ def make_pom_url(group, name, version)
119
+ central_body = [group.gsub(/\./, '/'), name, version].join('/')
120
+ central_tail = "#{name}-#{version}.pom"
121
+ [@central_prefix, central_body, central_tail].join('/')
122
+ end
123
+
124
+ def make_project_url
125
+ if has_version
126
+ "https://maven-repository.com/artifact/#{@group_id}/#{@artifact_id}/#{@version}"
127
+ else
128
+ "https://maven-repository.com/artifact/#{@group_id}/#{@artifact_id}"
129
+ end
130
+ end
131
+
132
+ def has_version
133
+ not @version.nil? and @version != ''
134
+ end
135
+
136
+ def get_license_info()
137
+ license_info = LicenseAuto::LicenseInfoWrapper.new
138
+ query_set = select
139
+
140
+ if query_set.nil?
141
+ LicenseAuto.logger.error("Maven search result is empty")
142
+ else
143
+ query_set.response.docs.each {|doc|
144
+ if doc.v == @version && doc.ec.include?('.pom')
145
+ pom_url, pom_str = get_package_pom(@group_id, @artifact_id, @version)
146
+ if pom_str
147
+ pack_wrapper, license_files = parser_pom(pom_url, pom_str)
148
+ license_info[:pack] = pack_wrapper
149
+ license_info[:licenses] = license_files
150
+ end
151
+ break
152
+ end
153
+ }
154
+ end
155
+ license_info
156
+ end
157
+
158
+ # @return homepage, source_url, licenses_file
159
+ def parser_pom(pom_url, pom_str)
160
+ LicenseAuto.logger.debug("pom_str:\n#{pom_str[0..70]}")
161
+ doc = Nokogiri::XML(pom_str).remove_namespaces!
162
+
163
+ # Source Code Manager
164
+ scm_node = doc.xpath("/project/scm/url")
165
+ source_url = if scm_node
166
+ scm_node.text
167
+ end
168
+
169
+ homepage_node = doc.xpath("/project/url")
170
+ homepage = if homepage_node
171
+ homepage_node.text
172
+ end
173
+
174
+ pack_wrapper = LicenseAuto::PackWrapper.new(
175
+ project_url: make_project_url,
176
+ homepage: homepage,
177
+ source_url: source_url
178
+ )
179
+
180
+ licenses_node = doc.xpath("//licenses/license")
181
+ LicenseAuto.logger.debug("licenses: \n#{licenses_node.to_xml}")
182
+
183
+ # Multi licenses: https://maven-repository.com/artifact/org.cryptacular/cryptacular/1.0
184
+ license_files = licenses_node.map {|node|
185
+ license_name = if node.xpath(".//name")
186
+ node.xpath(".//name").text.gsub(/\s/, '')
187
+ end
188
+ license_url = if node.xpath(".//url")
189
+ node.xpath(".//url").text
190
+ end
191
+
192
+ # TODO: find a license_text demo
193
+ license_text = if not node.xpath(".//text").empty?
194
+ LicenseAuto.logger.debug(node.xpath(".//text").text)
195
+ node.xpath(".//text").text
196
+ elsif not license_url.empty?
197
+ LicenseAuto.logger.debug(license_url)
198
+ # TODO: add proxy
199
+ response = HTTParty.get(license_url, timeout: 10)
200
+ response.body if response.code == 200
201
+ end
202
+
203
+ _license_name, sim_ratio =
204
+ if license_text
205
+ LicenseAuto::Similarity.new(license_text).most_license_sim
206
+ else
207
+ [nil, 1.0]
208
+ end
209
+
210
+ if license_text.nil? and not node.xpath(".//comments").empty?
211
+ LicenseAuto.logger.debug(node.xpath(".//comments").text)
212
+ license_text = node.xpath(".//comments").text
213
+ end
214
+
215
+ LicenseAuto::LicenseWrapper.new(
216
+ name: license_name,
217
+ sim_ratio: sim_ratio,
218
+ html_url: pom_url,
219
+ download_url: license_url,
220
+ text: license_text
221
+ )
222
+ }
223
+
224
+ # Comment license text info: eg. https://repo1.maven.org/maven2/commons-io/commons-io/2.4/commons-io-2.4.pom
225
+ if license_files.empty?
226
+ comment_head_node = doc.xpath("/comment()[contains(., 'license')]")
227
+ if comment_head_node.size > 0
228
+ license_text = comment_head_node.to_xml
229
+ license_files.push(
230
+ LicenseAuto::LicenseWrapper.new(
231
+ name: "UNKNOWN",
232
+ sim_ratio: 1.0,
233
+ html_url: pom_url,
234
+ download_url: pom_url,
235
+ text: license_text
236
+ )
237
+ )
238
+ else
239
+ author_head_node = doc.xpath("/comment()[contains(., 'author')]")
240
+ if author_head_node.size > 0
241
+ license_text = author_head_node.to_xml
242
+ license_files.push(
243
+ LicenseAuto::LicenseWrapper.new(
244
+ name: "UNKNOWN",
245
+ sim_ratio: 1.0,
246
+ html_url: pom_url,
247
+ download_url: pom_url,
248
+ text: license_text
249
+ )
250
+ )
251
+ end
252
+ end
253
+ end
254
+
255
+ [pack_wrapper, license_files]
256
+ end
257
+ end
258
+ end
@@ -0,0 +1,178 @@
1
+ require 'httparty'
2
+ require 'hashie/mash'
3
+ require 'license_auto/package_manager'
4
+
5
+ module LicenseAuto
6
+
7
+ class NpmRegistry < Website
8
+
9
+ attr_reader :registry
10
+
11
+ def initialize(package, registry='http://registry.npmjs.org/')
12
+ super(package)
13
+ @registry = registry
14
+ @pack_meta = nil
15
+ end
16
+
17
+ # RESTful API: http://registry.npmjs.org/:pack_name
18
+ # TEST: http://registry.npmjs.org/grunt
19
+ def get_package_meta
20
+ api_url = "#{@registry}#{@package.name}"
21
+ LicenseAuto.logger.debug(api_url)
22
+ response = HTTParty.get(api_url)
23
+
24
+ if response.code == 200
25
+ Hashie::Mash.new(JSON.parse(response.body))
26
+ else
27
+ LicenseAuto.logger.error("Npm registry API response: #{response}")
28
+ nil
29
+ end
30
+ end
31
+
32
+ # RESTful API: http://registry.npmjs.org/grunt/?version=0.1.0
33
+ def get_package_info_by_version
34
+ api_url = "#{@registry}#{@package.name}/?version=#{@package.version}"
35
+ LicenseAuto.logger.debug(api_url)
36
+ response = HTTParty.get(api_url)
37
+ case response.code
38
+ when 200
39
+ JSON.parse(response.licenses)
40
+ else
41
+ LicenseAuto.logger.error(response)
42
+ nil
43
+ end
44
+ end
45
+
46
+ # DOC: https://www.npmjs.com/package/semver
47
+ # DOC: https://github.com/npm/node-semver
48
+ # sem_version_range: '~1.2.3'
49
+ def get_available_versions(sem_version_range)
50
+ # LicenseAuto.logger.debug("sem_version_range: #{sem_version_range}")
51
+ package_meta = get_package_meta
52
+ all_versions = package_meta.versions
53
+
54
+ all_versions.select {|version, meta|
55
+ # Example: node -e "var semver = require('semver'); var result = semver.satisfies('1.2.3', '1.x || >=2.5.0 || 5.0.0 - 7.2.3'); console.log(result);"
56
+ cmd = "node -e \"var semver = require('semver'); var available = semver.satisfies('#{version}', '#{sem_version_range}'); console.log(available);\""
57
+ stdout_str, _stderr_str, _status = Open3.capture3(cmd)
58
+ if stdout_str == "true\n"
59
+ # LicenseAuto.logger.debug("available version: #{version}")
60
+ true
61
+ else
62
+ # LicenseAuto.logger.debug("version: #{version}, semver: #{sem_version_range}, #{stdout_str}, #{stderr_str}")
63
+ false
64
+ end
65
+ }
66
+ end
67
+
68
+ def chose_latest_available_version(sem_version_range)
69
+ available_versions = get_available_versions(sem_version_range)
70
+ chosen = available_versions.keys.last
71
+ LicenseAuto.logger.debug("chosen version: #{chosen} for #{@package.name}")
72
+ chosen
73
+ end
74
+
75
+ def get_license_info()
76
+ if @package.version.nil?
77
+ begin
78
+ @package.version = chose_latest_available_version('*')
79
+ rescue Exception => e
80
+ LicenseAuto.logger.error(e)
81
+ return nil
82
+ end
83
+ end
84
+
85
+ npm_info = get_package_meta
86
+ # LicenseAuto.logger.debug(npm_info)
87
+
88
+ raise LicenseAuto::PackageNotFound if npm_info.nil?
89
+
90
+ license_info = LicenseAuto::LicenseInfoWrapper.new
91
+
92
+ source_url = if npm_info.repository
93
+ npm_info.repository.url || npm_info.homepage_uri
94
+ end
95
+ if source_url
96
+ source_code_matcher = LicenseAuto::Matcher::SourceURL.new(source_url)
97
+ github_matched = source_code_matcher.match_github_resource
98
+ bitbucket_matched = source_code_matcher.match_bitbucket_resource
99
+
100
+ if github_matched
101
+ license_info = GithubCom.new(@package, github_matched[:owner], github_matched[:repo]).get_license_info
102
+ elsif bitbucket_matched
103
+ # TODO bitbucket_matched
104
+ elsif npm_info.homepage_uri
105
+ # LicenseAuto.logger.warn("TODO: HomepageSpider")
106
+ # homepage_spider = HomepageSpider.new(gem_info.homepage_uri, @package.name)
107
+ # source_code_uri = homepage_spider.get_source_code_uri
108
+ # if source_code_uri
109
+ # LicenseAuto.logger.warn("call myself recursively")
110
+ # else
111
+ # license_wrapper = homepage_spider.get_license_page
112
+ # LicenseAuto.logger.warn("omepageSpider")
113
+ # end
114
+ elsif not npm_info.licenses.empty?
115
+ # TODO:
116
+ LicenseAuto.logger.error(npm_info.licenses)
117
+ license_files = npm_info.licenses.map {|license|
118
+ LicenseAuto::LicenseWrapper.new(
119
+ name: license.type,
120
+ sim_ratio: 1.0,
121
+ html_url: nil,
122
+ download_url: license.url,
123
+ text: nil
124
+ )
125
+ }
126
+
127
+ license_info[:licenses] = license_files
128
+ # LicenseAuto.logger.debug(license_info)
129
+ elsif not npm_info.license.empty?
130
+ # TODO: [SPDX license expression syntax version 2.0 string](https://www.npmjs.com/package/spdx)
131
+ # Example:
132
+ # { "license": "ISC" }
133
+ # { "license": "(MIT OR Apache-2.0)" }
134
+ # No license:
135
+ # { "license": "UNLICENSED"}
136
+ # DOC: https://docs.npmjs.com/files/package.json#license
137
+ # Eg. ["LGPL-2.1", "MIT"]
138
+ licenses = npm_info.license.gsub(/^\(/, '').gsub(/\)$/, '').gsub(/\b(AND|OR)\b/, ' ').split(' ')
139
+ license_files = licenses.map {|license_name|
140
+ LicenseAuto::LicenseWrapper.new(
141
+ name: license_name,
142
+ sim_ratio: 1.0,
143
+ html_url: npm_info.homepage,
144
+ download_url: npm_info.homepage,
145
+ text: nil
146
+ )
147
+ }
148
+
149
+ license_info[:licenses] = license_files
150
+ LicenseAuto.logger.debug(license_info)
151
+ end
152
+
153
+ source_url = uniform_repository_url(npm_info.repository.url)
154
+ pack_wrapper = LicenseAuto::PackWrapper.new(
155
+ project_url: npm_info.project_uri,
156
+ homepage: npm_info.homepage,
157
+ source_url: source_url
158
+ )
159
+ license_info[:pack] = pack_wrapper
160
+ return license_info
161
+ else
162
+ raise LicenseAuto::SourceURLNotFound
163
+ end
164
+ end
165
+
166
+ def uniform_repository_url(repo_url)
167
+ git = /^git:\/\//
168
+ git_http = /^git\+http/
169
+ if repo_url =~ git
170
+ repo_url.gsub(git, 'http://')
171
+ elsif repo_url =~ git_http
172
+ repo_url.gsub(git_http, 'http')
173
+ else
174
+ repo_url
175
+ end
176
+ end
177
+ end
178
+ end