license_auto 0.1.1.2 → 0.1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/.gitmodules +3 -0
  4. data/.travis.yml +9 -2
  5. data/README.md +10 -8
  6. data/lib/license_auto/exceptions.rb +10 -0
  7. data/lib/license_auto/license/frequency.rb +1 -1
  8. data/lib/license_auto/license/readme.rb +21 -2
  9. data/lib/license_auto/license/similarity.rb +4 -3
  10. data/lib/license_auto/license_info_wrapper.rb +16 -0
  11. data/lib/license_auto/matcher.rb +14 -4
  12. data/lib/license_auto/package.rb +29 -15
  13. data/lib/license_auto/package_manager.rb +6 -2
  14. data/lib/license_auto/package_manager/bundler.rb +10 -6
  15. data/lib/license_auto/package_manager/gemfury.rb +2 -0
  16. data/lib/license_auto/package_manager/git_module.rb +73 -0
  17. data/lib/license_auto/package_manager/golang.rb +155 -0
  18. data/lib/license_auto/package_manager/gradle.rb +145 -0
  19. data/lib/license_auto/package_manager/maven.rb +133 -0
  20. data/lib/license_auto/package_manager/npm.rb +117 -1
  21. data/lib/license_auto/package_manager/pip.rb +8 -2
  22. data/lib/license_auto/repo.rb +51 -3
  23. data/lib/license_auto/source_code_server.rb +13 -0
  24. data/lib/license_auto/var/golang_std_libs.rb +160 -0
  25. data/lib/license_auto/version.rb +1 -1
  26. data/lib/license_auto/website.rb +1 -0
  27. data/lib/license_auto/website/cocoapods_org.rb +10 -0
  28. data/lib/license_auto/website/github_com.rb +43 -18
  29. data/lib/license_auto/website/homepage_spider.rb +56 -0
  30. data/lib/license_auto/website/maven_central_repository.rb +258 -0
  31. data/lib/license_auto/website/npm_registry.rb +178 -0
  32. data/lib/license_auto/website/ruby_gems_org.rb +119 -8
  33. data/license_auto.gemspec +1 -0
  34. data/package.json +30 -0
  35. metadata +25 -40
  36. data/lib/license_auto/package_manager/golang_stdlibs.rb +0 -161
  37. data/remove/api.rb +0 -60
  38. data/remove/api/bitbucket.rb +0 -142
  39. data/remove/api/code_google_com.rb +0 -66
  40. data/remove/api/excel_export.rb +0 -189
  41. data/remove/api/gem_data.rb +0 -30
  42. data/remove/api/git_kernel_org.rb +0 -59
  43. data/remove/api/github.rb +0 -376
  44. data/remove/api/go_pkg_in.rb +0 -41
  45. data/remove/api/golang_org.rb +0 -63
  46. data/remove/api/google_source_com.rb +0 -68
  47. data/remove/api/gradle2.rb +0 -41
  48. data/remove/api/helper.rb +0 -26
  49. data/remove/api/j_center.rb +0 -23
  50. data/remove/api/maven_central_repository.rb +0 -192
  51. data/remove/api/mq.rb +0 -30
  52. data/remove/api/npm_registry.rb +0 -169
  53. data/remove/api/pattern.rb +0 -33
  54. data/remove/api/remote_source_package.rb +0 -319
  55. data/remove/api/spider.rb +0 -47
  56. data/remove/cloner.rb +0 -154
  57. data/remove/db.rb +0 -267
  58. data/remove/license_auto +0 -4
  59. data/remove/misc.rb +0 -131
  60. data/remove/mq_pack.rb +0 -219
  61. data/remove/mq_repo.rb +0 -106
  62. data/remove/parser/gemfile_parser.rb +0 -221
  63. data/remove/parser/golang_parser.rb +0 -104
  64. data/remove/parser/gradle_parser.rb +0 -141
  65. data/remove/parser/manifest_parser.rb +0 -66
  66. data/remove/parser/maven_parser.rb +0 -91
  67. data/remove/parser/npm_parser.rb +0 -82
  68. data/remove/parser/pip_parser.rb +0 -1
  69. data/remove/parser/rebar_parser.rb +0 -61
  70. data/remove/recorder.rb +0 -184
  71. data/remove/script/apt-get.deps.sh +0 -37
  72. data/remove/script/debian_dpkg_list.sh +0 -2
  73. data/remove/script/rubygems.org.importdb.sh +0 -22
@@ -1,41 +0,0 @@
1
- require_relative '../api/github'
2
-
3
- module API
4
- class GoPkgIn
5
- attr_reader :repo_url, :protocol, :host, :owner, :repo, :ref
6
- # DOC: http://labix.org/gopkg.in#SupportedURLs
7
- # repo_url: https://gopkg.in/validator.v2 -> https://github.com/go-validator/validator/tree/v2
8
- def initialize(repo_url, db_ref=nil)
9
- @repo_url_perfix = 'https://github.com'
10
-
11
- repo_url_pattern = API::SOURCE_URL_PATTERN[:go_pkg_in]
12
- regex_group = repo_url_pattern.match(repo_url)
13
- @protocol = regex_group[:protocol]
14
- @host = regex_group[:host]
15
- @repo = regex_group[:repo]
16
- @owner = regex_group[:owner].nil? ? "go-#{@repo}" : regex_group[:owner]
17
-
18
- # TODO: follow DOC: ()branch/tag v3, v3.N, or v3.N.M)
19
- @ref = regex_group[:ref]
20
- @repo_url = "#{@repo_url_perfix}/#{@owner}/#{@repo}"
21
- end
22
-
23
- def last_commits
24
- g = API::Github.new(@repo_url, db_ref=@ref)
25
- g.last_commits
26
- end
27
-
28
- end
29
- end
30
-
31
- if __FILE__ == $0
32
- url = 'https://gopkg.in/validator.v2'
33
- g = API::GoPkgIn.new(url)
34
- p g.last_commits
35
- p g.protocol
36
- p g.host
37
- p g.owner
38
- p g.repo
39
- p g.ref
40
- p g.repo_url
41
- end
@@ -1,63 +0,0 @@
1
- require 'anemone'
2
-
3
- require_relative '../api/github'
4
- require_relative '../api/pattern'
5
- require_relative '../../lib/misc'
6
-
7
- module API
8
- class GolangOrg
9
- attr_reader :repo_url, :protocol, :host, :owner, :repo, :ref
10
- # DOC:
11
- # repo_url: https://golang.org/x/crypto
12
- def initialize(repo_url, db_ref=nil)
13
- @golang_import_url = repo_url
14
- golang_doc_prefix = 'https://godoc.org/'
15
- @golang_doc_url = "#{golang_doc_prefix}#{repo_url.gsub(/http[s]?:\/\//, '')}"
16
-
17
- repo_url_pattern = API::SOURCE_URL_PATTERN[:golang_org]
18
- regex_group = repo_url_pattern.match(repo_url)
19
- @protocol = regex_group[:protocol]
20
- @host = regex_group[:host]
21
- @repo = regex_group[:repo]
22
- @owner = regex_group[:owner].nil? ? "go-#{@repo}" : regex_group[:owner]
23
- @ref = nil
24
-
25
- @http_option = {}
26
- http_proxy = Misc.get_http_proxy
27
- if http_proxy
28
- @http_option[:proxy_host] = http_proxy[:addr]
29
- @http_option[:proxy_port] = http_proxy[:port]
30
- end
31
- @repo_url = get_repo_url
32
- end
33
-
34
- def get_repo_url
35
- opts = {:discard_page_bodies => true, :depth_limit => 0}.merge(@http_option)
36
- Anemone.crawl(@golang_doc_url, opts) do |anemone|
37
- anemone.on_every_page do |page|
38
- # $plog.debug(page.body)
39
- xpath = "//div[@id='x-projnav']/a[1]"
40
- target_link = page.doc.xpath(xpath)
41
- if target_link.size == 0
42
- raise "last_commit error: #{self}, #{@repo_url}"
43
- else
44
- # short_sha = target_link.text()
45
- href = target_link.attr('href').value
46
- @repo_url = href
47
- end
48
- end
49
- end
50
- $plog.debug("@golang_import_url: #{@golang_import_url}, @repo_url: #{@repo_url}")
51
- @repo_url
52
- end
53
-
54
- end
55
- end
56
-
57
- if __FILE__ == $0
58
- url = 'https://google.golang.org/cloud/compute'
59
- url = 'https://golang.org/x/crypto'
60
- g = API::GolangOrg.new(url)
61
- end
62
-
63
-
@@ -1,68 +0,0 @@
1
- require 'anemone'
2
-
3
- require_relative '../api/pattern'
4
- require_relative '../../lib/misc'
5
-
6
- module API
7
- class GoogleSourceCom
8
- attr_reader :repo_url, :protocol, :host, :owner, :repo, :ref
9
-
10
- # repo_url: https://go.googlesource.com/crypto
11
- def initialize(repo_url, db_ref=nil)
12
- @repo_url = repo_url
13
-
14
- repo_url_pattern = API::SOURCE_URL_PATTERN[:google_source_com]
15
- regex_group = repo_url_pattern.match(repo_url)
16
- @protocol = regex_group[:protocol]
17
- @host = regex_group[:host]
18
- @owner = regex_group[:owner]
19
- @repo = regex_group[:repo]
20
-
21
- @http_option = {}
22
- http_proxy = Misc.get_http_proxy
23
- if http_proxy
24
- @http_option[:proxy_host] = http_proxy[:addr]
25
- @http_option[:proxy_port] = http_proxy[:port]
26
- end
27
- @ref = db_ref
28
-
29
- end
30
-
31
- # URL.
32
- def last_commits
33
- last_commit = nil
34
- opts = {:discard_page_bodies => true, :depth_limit => 0}.merge(@http_option)
35
- commit_page = "#{@repo_url}"
36
- Anemone.crawl(commit_page, opts) do |anemone|
37
- anemone.on_every_page do |page|
38
- xpath = "//ol[@class='CommitLog']/li[1]/a[1]"
39
- target_link = page.doc.xpath(xpath)
40
- # p target_link
41
- if target_link.size == 0
42
- raise "last_commit error: #{self}, #{@repo_url}"
43
- else
44
- # full_href = text.attr('href')
45
- short_sha = target_link.text()
46
- full_sha = target_link.attr('href').value
47
- last_commit = {
48
- 'sha' => full_sha.split('/+/').last
49
- }
50
- end
51
- end
52
- end
53
- last_commit
54
- end
55
- end
56
- end
57
-
58
-
59
- if __FILE__ == $0
60
- url = 'https://go.googlesource.com/crypto'
61
- g = API::GoogleSourceCom.new(url)
62
- p g.last_commits
63
- # p g.protocol
64
- # p g.host
65
- # p g.owner
66
- # p g.repo
67
- # p g.repo_url
68
- end
@@ -1,41 +0,0 @@
1
- module API
2
- class Gradle
3
- def initialize(gradle_file)
4
- @gradle_file = gradle_file
5
- @websites = {
6
- mvn: 'http://mvnrepository.com/',
7
- maven: 'https://maven-repository.com/search',
8
- rpmfind: 'http://www.rpmfind.net/'
9
- }
10
-
11
- end
12
-
13
- # DOC: http://pkaq.github.io/gradledoc/docs/userguide/ch11/tutorial_gradle_command_line.html#sec:listing_dependencies
14
- def list_dependencies()
15
- deps = {
16
- :pack_name_1 => 'pack_version_1',
17
- :pack_name_2 => 'pack_version_2'
18
- }
19
- end
20
-
21
- # JSON: http://search.maven.org/solrsearch/select?q=g:%22org.apache.commons%22&rows=20&wt=json
22
- # JSON: http://search.maven.org/solrsearch/select?q=g:%22org.apache.commons%22%20AND%20a:%22commons-lang3%22&rows=2000&wt=json
23
- # JSON: http://search.maven.org/solrsearch/select?q=g:%22org.apache.commons%22%20AND%20a:%22commons-lang3%22%20AND%20v:%223.0%22&rows=2000&wt=json
24
- # http://search.maven.org/solrsearch/select?q=g:%22org.apache.maven.indexer%22+AND+a:%22maven-indexer%22&rows=20&core=gav
25
-
26
- def fetch_license_info_from_website()
27
- license = nil
28
- license_url = nil
29
- license_text = nil
30
- source_code_download_url = nil
31
- source_package_page_link = nil
32
- license_info = {
33
- license: license,
34
- license_url: license_url,
35
- license_text: license_text,
36
- source_url: source_code_download_url,
37
- homepage: source_package_page_link
38
- }
39
- end
40
- end
41
- end
data/remove/api/helper.rb DELETED
@@ -1,26 +0,0 @@
1
- require_relative './pattern'
2
- module API
3
-
4
- class Helper
5
- def self.is_license_file(filename)
6
- return filename =~ API::FILE_NAME_PATTERN[:license_file]
7
- end
8
-
9
- # file_pathname = 'foo/to/bar'
10
- def self.is_root_file(file_pathname)
11
- return file_pathname.split('/').size == 2
12
- end
13
-
14
- def self.is_readme_file(filename)
15
- return filename =~ API::FILE_NAME_PATTERN[:readme_file]
16
- end
17
-
18
- def self.is_notice_file(filename)
19
- return filename =~ API::FILE_NAME_PATTERN[:notice_file]
20
- end
21
- def self.is_debian_copyright_file(file_pathname)
22
- return file_pathname =~ API::FILE_NAME_PATTERN[:debian_copyright_file]
23
- end
24
- end
25
-
26
- end
@@ -1,23 +0,0 @@
1
- module API
2
- class JCenter
3
-
4
- # Site: https://bintray.com/bintray/jcenter
5
- # DOC: https://bintray.com/docs/api/
6
- def initialize(group, name, version)
7
- end
8
-
9
- def get_license_info()
10
- {
11
- license: nil,
12
- license_text: nil,
13
- source_url: nil,
14
- homepage: nil,
15
-
16
- }
17
- end
18
-
19
-
20
- end
21
- end
22
-
23
-
@@ -1,192 +0,0 @@
1
- require 'json'
2
- require 'nokogiri'
3
- require 'httparty'
4
- require_relative '../../config/config'
5
-
6
- module API
7
- # DOC: http://search.maven.org/#api
8
- class MavenCentralRepository
9
- def initialize(group_id, artifact_id, version)
10
- @group_id = group_id
11
- @artifact_id = artifact_id
12
- @version = version
13
- # @scope = scope
14
- # TODO: what is this field?
15
- @classifier = ''
16
-
17
- @api_url = "http://search.maven.org/solrsearch/select?q=g%3A%22com.google.inject%22&rows=20&wt=json"
18
- end
19
-
20
- # DOC: http://search.maven.org/solrsearch/select?q=g:"com.google.inject"+AND+a:"guice"&core=gav&rows=20&wt=json
21
- def select()
22
- url = "http://search.maven.org/solrsearch/select?q=g:\"#{@group_id}\"+AND+a:\"#{@artifact_id}\"&core=gav&rows=20&wt=json"
23
- url = URI.escape(url)
24
- $plog.debug(url)
25
- response = HTTParty.get(url)
26
- if response.code == 200
27
- query_set = JSON.parse(response.licenses)
28
- else
29
- raise "CentralRepository select error: #{response}"
30
- end
31
- end
32
-
33
- # Eg: http://search.maven.org/solrsearch/select?q=g:%22com.google.inject%22%20AND%20a:%22guice%22%20AND%20v:%223.0%22%20AND%20l:%22javadoc%22%20AND%20p:%22jar%22&rows=20&wt=json
34
- def advance_search
35
- url = "http://search.maven.org/solrsearch/select?q=g:\"#{@group_id}\" AND a:\"#{@artifact_id}\" AND v:\"#{@version}\" AND l:\"#{@classifier}\" AND p:\"jar\"&rows=20&wt=json"
36
- url = URI.escape(url)
37
- $plog.debug("api_url: #{url}")
38
- response = HTTParty.get(url)
39
- if response.code == 200
40
- query_set = JSON.parse(response.licenses)
41
- else
42
- raise "CentralRepository select error: #{response}"
43
- end
44
- end
45
-
46
- # POM: https://repo1.maven.org/maven2/com/google/inject/guice/3.0/guice-3.0.pom
47
- def get_package_pom(group, name, version)
48
- pom = nil
49
-
50
- central_prefix = 'https://repo1.maven.org/maven2'
51
- central_body = [group.gsub(/\./, '/'), name, version].join('/')
52
- central_tail = "#{name}-#{version}.pom"
53
- pom_url = [central_prefix, central_body, central_tail].join('/')
54
- # http://stackoverflow.com/questions/25814210/opensslsslsslerror-ssl-connect-syscall-returned-5-errno-0-state-sslv3-read
55
- opts = {
56
- :ssl_version => 'TLSv1'
57
- }
58
- response = HTTParty.get(pom_url, options=opts)
59
- if response.code == 200
60
- $plog.debug("pom_url: #{pom_url}")
61
- pom = response.licenses
62
- else
63
- $plog.error("CentralRepository get_package_pom error: pom_url: #{pom_url}, #{response}")
64
- end
65
- return pom_url, pom
66
- end
67
-
68
- def get_license_info()
69
- license_info = {
70
- homepage: nil,
71
- source_url: nil,
72
- licenses: [],
73
- project_url: nil,
74
- pom_url: nil
75
- }
76
- query_set = select
77
-
78
- if query_set
79
- query_set['response']['docs'].each {|d|
80
- version = d['v']
81
- ec = d['ec']
82
- if version == @version && ec.index('.pom')
83
- license_info[:project_url] = "https://maven-repository.com/artifact/#{@group_id}/#{@artifact_id}/#{@version}"
84
- pom_url, pom = get_package_pom(@group_id, @artifact_id, @version)
85
- if pom
86
- license_info[:pom_url] = pom_url
87
-
88
- # $plog.debug("pom: #{pom}")
89
- doc = Nokogiri::XML(pom).remove_namespaces!
90
-
91
- source_code_node = doc.xpath("/project/scm/url")
92
- license_info[:source_url] = source_code_node.text if source_code_node
93
-
94
- homepage_node = doc.xpath("/project/url")
95
- license_info[:homepage] = homepage_node.text if homepage_node
96
-
97
- xpath = "//licenses/license"
98
- licenses = doc.xpath(xpath)
99
- $plog.debug("licenses: #{licenses.to_xml}")
100
-
101
- # Multi licenses: https://maven-repository.com/artifact/org.cryptacular/cryptacular/1.0
102
- licenses.each {|node|
103
- license = nil
104
- license_url = nil
105
- license_text = nil
106
- if node.xpath(".//name")
107
- license = node.xpath(".//name").text
108
- # $plog.debug("license: #{license}")
109
- end
110
- license_url = node.xpath(".//url").text if licenses.xpath(".//url")
111
-
112
- # TODO: find a license_text demo
113
- # license_text = licenses.xpath(".//text").text if licenses.xpath(".//text")
114
-
115
- license_info[:licenses] << {
116
- license: license,
117
- license_url: license_url,
118
- license_text: license_text
119
- }
120
- }
121
- if licenses.size == 0
122
- $plog.debug("licenses.size: #{licenses.size}")
123
- # Comment license text info: eg. https://repo1.maven.org/maven2/commons-io/commons-io/2.4/commons-io-2.4.pom
124
- comment_node = doc.xpath("/comment()[contains(., 'license')]")
125
- if comment_node.size > 0
126
- license_text = comment_node.to_xml
127
- license_info[:licenses] << {
128
- license: 'UNKNOWN',
129
- license_url: nil,
130
- license_text: license_text
131
- }
132
- end
133
- end
134
- end
135
-
136
- break
137
- end
138
- }
139
- end
140
- license_info
141
- end
142
- end
143
- end
144
-
145
- if __FILE__ == $0
146
- # One license
147
- # item = {:group=>"net.sourceforge.nekohtml", :name=>"nekohtml", :version=>"1.9.20"}
148
-
149
- # Two license
150
- # item = {:group=>"org.cryptacular", :name=>"cryptacular", :version=>"1.0"}
151
-
152
- # License in comments
153
- item = {:group=>"commons-io", :name=>"commons-io", :version=>"2.4"}
154
-
155
-
156
- 'aopalliance:aopalliance:1.0'
157
- 'cglib:cglib-nodep:3.1'
158
- 'com.beust:jcommander:1.48'
159
- 'com.google.guava:guava:18.0'
160
- 'com.google.inject:guice:no_aop:4.0'
161
- 'com.jayway.awaitility:awaitility:1.6.3'
162
- 'javax.inject:javax.inject:1'
163
- 'junit:junit:4.10'
164
- 'org.apache.ant:ant:1.7.0'
165
- 'org.apache.ant:ant-launcher:1.7.0'
166
- 'org.assertj:assertj-core:3.1.0'
167
- 'org.beanshell:bsh:2.0b4'
168
- 'org.hamcrest:hamcrest-core:1.3'
169
- 'org.hamcrest:hamcrest-library:1.3'
170
- 'org.objenesis:objenesis:2.1'
171
- 'org.testng:testng:6.9.6'
172
- 'org.yaml:snakeyaml:1.15'
173
-
174
- begin
175
- i = 'javax.inject:javax.inject:1'
176
- g, a, v = i.split(':')
177
- c = API::MavenCentralRepository.new(g, a, v)
178
-
179
- license_info = c.get_license_info
180
- $plog.debug("license_info #{license_info}")
181
-
182
- if license_info[:licenses].size == 1
183
- elsif license_info[:licenses].size > 1
184
- $plog.debug("#{g}:#{a}:#{v} -- has multi license")
185
- end
186
-
187
- rescue Exception => e
188
- $plog.error("#{g}:#{a}:#{v}, #{e}")
189
- end
190
-
191
- $plog.info("results: #{license_info}")
192
- end