license_auto 0.1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +47 -0
  3. data/.travis.yml +9 -0
  4. data/CHANGELOG.md +17 -0
  5. data/Gemfile +27 -0
  6. data/Gemfile.lock +103 -0
  7. data/LICENSE +23 -0
  8. data/README.md +136 -0
  9. data/Rakefile +20 -0
  10. data/examples/get_license_files_of_bundler.rb +12 -0
  11. data/lib/api.rb +60 -0
  12. data/lib/api/bitbucket.rb +142 -0
  13. data/lib/api/code_google_com.rb +66 -0
  14. data/lib/api/excel_export.rb +189 -0
  15. data/lib/api/gem_data.rb +30 -0
  16. data/lib/api/git_kernel_org.rb +59 -0
  17. data/lib/api/github.rb +376 -0
  18. data/lib/api/go_pkg_in.rb +41 -0
  19. data/lib/api/golang_org.rb +63 -0
  20. data/lib/api/google_source_com.rb +68 -0
  21. data/lib/api/gradle2.rb +41 -0
  22. data/lib/api/helper.rb +26 -0
  23. data/lib/api/j_center.rb +23 -0
  24. data/lib/api/maven_central_repository.rb +192 -0
  25. data/lib/api/mq.rb +30 -0
  26. data/lib/api/npm_registry.rb +169 -0
  27. data/lib/api/pattern.rb +33 -0
  28. data/lib/api/remote_source_package.rb +319 -0
  29. data/lib/api/spider.rb +47 -0
  30. data/lib/cloner.rb +154 -0
  31. data/lib/db.rb +267 -0
  32. data/lib/license_auto.rb +20 -0
  33. data/lib/license_auto/config/config.rb +52 -0
  34. data/lib/license_auto/errors.rb +0 -0
  35. data/lib/license_auto/license/frequency.rb +30 -0
  36. data/lib/license_auto/license/similarity.rb +247 -0
  37. data/lib/license_auto/license/templates/AFL2.0.txt +43 -0
  38. data/lib/license_auto/license/templates/AFL2.1.txt +47 -0
  39. data/lib/license_auto/license/templates/AFL3.0.txt +45 -0
  40. data/lib/license_auto/license/templates/AGPL3.0.txt +236 -0
  41. data/lib/license_auto/license/templates/APSL 2.0.txt +100 -0
  42. data/lib/license_auto/license/templates/Apache1.0.txt +18 -0
  43. data/lib/license_auto/license/templates/Apache1.1.txt +18 -0
  44. data/lib/license_auto/license/templates/Apache2.0.txt +201 -0
  45. data/lib/license_auto/license/templates/Artistic1.0.txt +45 -0
  46. data/lib/license_auto/license/templates/Artistic2.0.txt +70 -0
  47. data/lib/license_auto/license/templates/BSD.txt +29 -0
  48. data/lib/license_auto/license/templates/CDDL1.0.txt +1 -0
  49. data/lib/license_auto/license/templates/CDDL1.1.txt +209 -0
  50. data/lib/license_auto/license/templates/CPL1.0.txt +211 -0
  51. data/lib/license_auto/license/templates/CPOL 1.02.txt +41 -0
  52. data/lib/license_auto/license/templates/ClarifiedArtistic.txt +140 -0
  53. data/lib/license_auto/license/templates/Creative Commons1.0.txt +28 -0
  54. data/lib/license_auto/license/templates/EPL1.0.txt +70 -0
  55. data/lib/license_auto/license/templates/ERLANG1.1.txt +286 -0
  56. data/lib/license_auto/license/templates/GFDL1.1.txt +101 -0
  57. data/lib/license_auto/license/templates/GFDL1.2.txt +108 -0
  58. data/lib/license_auto/license/templates/GFDL1.3.txt +116 -0
  59. data/lib/license_auto/license/templates/GPL1.0.txt +105 -0
  60. data/lib/license_auto/license/templates/GPL2.0.txt +340 -0
  61. data/lib/license_auto/license/templates/GPL3.0.txt +220 -0
  62. data/lib/license_auto/license/templates/H2.txt +118 -0
  63. data/lib/license_auto/license/templates/IBMPublic1.0.txt +58 -0
  64. data/lib/license_auto/license/templates/LGPL2.0.txt +481 -0
  65. data/lib/license_auto/license/templates/LGPL2.1.txt +166 -0
  66. data/lib/license_auto/license/templates/LGPL3.0.txt +56 -0
  67. data/lib/license_auto/license/templates/Lucent Public License 1.02.txt +244 -0
  68. data/lib/license_auto/license/templates/MIT.txt +21 -0
  69. data/lib/license_auto/license/templates/MIT2.0.txt +11 -0
  70. data/lib/license_auto/license/templates/MPL 2.0.txt +373 -0
  71. data/lib/license_auto/license/templates/MPL1.1.txt +470 -0
  72. data/lib/license_auto/license/templates/MS-LPL.txt +22 -0
  73. data/lib/license_auto/license/templates/MS-PL.txt +20 -0
  74. data/lib/license_auto/license/templates/MS-RL.txt +19 -0
  75. data/lib/license_auto/license/templates/NPL1.0.txt +138 -0
  76. data/lib/license_auto/license/templates/OpenSSL.txt +127 -0
  77. data/lib/license_auto/license/templates/Oracle license.txt +69 -0
  78. data/lib/license_auto/license/templates/PublicDomain.txt +7 -0
  79. data/lib/license_auto/license/templates/Python.txt +15 -0
  80. data/lib/license_auto/license/templates/QPL1.0.txt +45 -0
  81. data/lib/license_auto/license/templates/RubyClause-6.txt +56 -0
  82. data/lib/license_auto/license/templates/SQLite Copyright.txt +33 -0
  83. data/lib/license_auto/license/templates/Sleepycat.txt +133 -0
  84. data/lib/license_auto/license/templates/SunPublic1.0.txt +183 -0
  85. data/lib/license_auto/license/templates/WTFPL license.txt +13 -0
  86. data/lib/license_auto/license_info.rb +12 -0
  87. data/lib/license_auto/matcher.rb +100 -0
  88. data/lib/license_auto/package.rb +73 -0
  89. data/lib/license_auto/parser/gemfile.rb +0 -0
  90. data/lib/license_auto/parser/golang.rb +0 -0
  91. data/lib/license_auto/parser/gradle.rb +0 -0
  92. data/lib/license_auto/version.rb +20 -0
  93. data/lib/license_auto/website.rb +11 -0
  94. data/lib/license_auto/website/github.rb +49 -0
  95. data/lib/license_auto/website/ruby_gems_org.rb +79 -0
  96. data/lib/license_auto/website/ruby_gems_org_db.rb +52 -0
  97. data/lib/license_auto/website/rubydoc_info.rb +12 -0
  98. data/lib/message.rb +5 -0
  99. data/lib/misc.rb +131 -0
  100. data/lib/parser/enums.rb +161 -0
  101. data/lib/parser/gemfile_parser.rb +221 -0
  102. data/lib/parser/golang_parser.rb +104 -0
  103. data/lib/parser/gradle_parser.rb +141 -0
  104. data/lib/parser/manifest_parser.rb +66 -0
  105. data/lib/parser/maven_parser.rb +91 -0
  106. data/lib/parser/npm_parser.rb +82 -0
  107. data/lib/parser/pip_parser.rb +1 -0
  108. data/lib/parser/rebar_parser.rb +61 -0
  109. data/lib/recorder.rb +184 -0
  110. data/lib/script/apt-get.deps.sh +37 -0
  111. data/lib/script/debian_dpkg_list.sh +2 -0
  112. data/lib/script/rubygems.org.importdb.sh +22 -0
  113. data/license_auto.gemspec +44 -0
  114. data/remove/license_auto +4 -0
  115. data/remove/mq_pack.rb +219 -0
  116. data/remove/mq_repo.rb +106 -0
  117. metadata +249 -0
data/lib/db.rb ADDED
@@ -0,0 +1,267 @@
1
+ require 'httparty'
2
+ require_relative '../config/config'
3
+ require_relative '../lib/message'
4
+
5
+ def add_product(product_name)
6
+ r = $conn.exec_params("select * from product where name = $1", [product_name])
7
+ if r.ntuples == 1
8
+ return false, r[0]
9
+ else
10
+ product = $conn.exec_params("insert into product (name) select $1 returning *", [product_name])
11
+ if product.ntuples == 1
12
+ return true, product[0]
13
+ end
14
+ end
15
+ end
16
+
17
+ def add_repo(repo_name, source_url, parent_repo_id=nil, priv=-1)
18
+ r = $conn.exec_params("select * from repo where name = $1 and source_url = $2", [repo_name, source_url])
19
+ if r.ntuples == 1
20
+ return false, r[0]
21
+ else
22
+ repo = $conn.exec_params("insert into repo (name, source_url, priv, parent_repo_id) select $1, $2, $3, $4 returning *",
23
+ [repo_name, source_url, priv, parent_repo_id])
24
+ if repo.ntuples == 1
25
+ return true, repo[0]
26
+ end
27
+ end
28
+ end
29
+
30
+ def api_get_repo_source_url(repo_id)
31
+ source_url = nil
32
+ r = $conn.exec_params("select source_url from repo where id = $1", [repo_id])
33
+ if r.ntuples == 1
34
+ source_url = r[0]['source_url']
35
+ end
36
+
37
+ source_url
38
+ end
39
+
40
+ def api_clear_relations(release_id, repo_id)
41
+ r = $conn.exec_params("
42
+ delete from product_repo_pack
43
+ where id in (
44
+ select r.id from product_repo_pack r
45
+ join pack on pack.id = r.pack_id
46
+ where product_repo_id in (
47
+ select id from product_repo where
48
+ release_id = $1
49
+ and repo_id = $2
50
+ )
51
+ and pack.status < 41
52
+ )", [release_id, repo_id])
53
+ $plog.warn("You are rerunning the repo's deps, the history relation table data of this repo was deleted")
54
+ end
55
+
56
+ def api_get_case_by_id(case_id)
57
+ repo_id = nil
58
+ r = $conn.exec_params("select product_id, release_id, repo_id from product_repo where id = $1", [case_id])
59
+ if r.ntuples == 1
60
+ repo_id = r[0]
61
+ end
62
+ repo_id
63
+ end
64
+
65
+ def api_query_product_repo(release_id, repo_id)
66
+ r = $conn.exec_params("
67
+ select * from product_repo
68
+ where release_id = $1
69
+ and repo_id = $2", [release_id, repo_id])
70
+ end
71
+
72
+ def api_get_repo_manifest_file_list(repo_id)
73
+ r = $conn.exec_params("select ymls from repo where id = $1", [repo_id])
74
+ end
75
+
76
+ def api_add_product_repo(release_id, parent_repo_id, sub_repo_id)
77
+ all_products = $conn.exec_params("
78
+ select * from product_repo
79
+ where release_id = $1
80
+ and repo_id = $2", [release_id, parent_repo_id])
81
+ all_products.each {|p|
82
+ product_id = p['product_id']
83
+ begin
84
+ pg_result = $conn.exec_params("select * from product_repo where release_id = $1 and product_id = $2 and repo_id = $3",
85
+ [release_id, product_id, sub_repo_id])
86
+ if pg_result.ntuples == 0
87
+ $plog.debug("release_id: #{release_id}, repo_id: #{sub_repo_id}, parent_repo_id: #{parent_repo_id}")
88
+ r = $conn.exec_params("insert into product_repo (release_id, product_id, repo_id) values ($1, $2, $3)",
89
+ [release_id, product_id, sub_repo_id])
90
+ end
91
+ rescue Exception => e
92
+ $plog.fatal(e)
93
+ end
94
+ }
95
+
96
+ end
97
+
98
+ def api_add_product_repo_pack(repo_id, pack_id, release_id)
99
+ # $plog.debug("api_add_product_repo_pack: (repo_id=#{repo_id}, pack_id=#{pack_id}, release_id=#{release_id})")
100
+ r = $conn.exec_params("select add_product_repo_pack($1, $2, $3)", [repo_id, pack_id, release_id])
101
+ # $plog.debug("SQL: select add_product_repo_pack(#{repo_id}, #{pack_id}, #{release_id})")
102
+ r[0]
103
+ end
104
+
105
+ def api_add_pack(pack_name, pack_version, lang, homepage, source_url, license, status, cmt, project_url=nil)
106
+ # "select * from select add_pack('goose', 'unknown', 'Golang', null, null, null, null, null) as t(pack_id integer, new bool)"
107
+ # $plog.info("status: #{status}")
108
+ r = $conn.exec_params("select * from add_pack($1, $2, $3, $4, $5, $6, $7, $8, $9) as t(pack_id integer, is_newbie bool)",
109
+ [pack_name, pack_version, lang, homepage, source_url, license, status, cmt, project_url])
110
+ ret = nil
111
+ if r.ntuples == 1
112
+ ret = r[0]
113
+ end
114
+ ret
115
+ end
116
+
117
+ def api_get_complete_ratio(release_id, repo_id)
118
+ # TODO: port
119
+ ratio = 0.0
120
+ api_path = '/api/v1/repo/complete_ratio'
121
+ api_url = "#{LICENSE_WEBSITE_URL}#{api_path}"
122
+ response = HTTParty.get(api_url,
123
+ :query => {
124
+ :release_id => release_id,
125
+ :repo_id => repo_id
126
+ })
127
+ if response.code == 200
128
+ ratio = response.licenses.to_f
129
+ $plog.debug("complete_ratio: #{ratio}")
130
+ if ratio >= 0.95
131
+ release_name = nil
132
+ repo_source_url = nil
133
+ content = "Your release: #{release_name}, repo: #{repo_source_url} has completed, check it please."
134
+ Message.send(content)
135
+ end
136
+ else
137
+ $plog.error("#{response}")
138
+ end
139
+ ratio
140
+ end
141
+
142
+ def api_get_pack_by_id(pack_id)
143
+ pack = nil
144
+ r = $conn.exec_params("select id, name, version, source_url, lang, homepage, license, license_text, status from pack
145
+ where id = $1", [pack_id])
146
+ if r.ntuples == 1
147
+ pack = r[0]
148
+ end
149
+ pack
150
+ end
151
+
152
+ def api_get_std_license_name(where='where 1 = 1')
153
+ r = $conn.exec("select * from std_license #{where}")
154
+ end
155
+
156
+ def api_setup_pack_status(pack_id, status, cmt)
157
+ $plog.debug(cmt)
158
+ r = $conn.exec_params("update pack set status = $1, cmt = $2, update_at = now() where id = $3", [status, cmt[0..79], pack_id])
159
+ end
160
+
161
+ def api_setup_case_status(repo_id, status, cmt)
162
+ $plog.debug(cmt)
163
+ r = $conn.exec_params("update product_repo set status = $1, cmt = $2, update_at = now() where id = $3", [status, cmt[0..79], repo_id])
164
+ end
165
+
166
+
167
+ def api_update_pack_info(pack_id, pack)
168
+ # r = $conn.exec_params("select update_pack($1,$2,$3,$4,$5,$6,$7,$8,$9)",[pack_id,pack['version'],pack['homepage'],pack['source_url'],pack['license_url'],pack['license'],pack['unclear_license'],pack['license_text'],pack['status']])
169
+
170
+
171
+ r = $conn.exec_params("select update_pack($1,$2,$3,$4,$5,$6,$7,$8,$9,$10)",
172
+ [pack_id, pack[:version], pack[:homepage], pack[:source_url], pack[:license_url],
173
+ pack[:license], pack[:unclear_license], pack[:license_text], pack[:status], pack[:project_url]])
174
+ if(r[0] == -1)
175
+ return false
176
+ else
177
+ return true
178
+ end
179
+ end
180
+
181
+ def api_get_packs_by_name(name, version, lang)
182
+ r = $conn.exec_params("select * from pack where name = $1 and version = $2 and lang = $3",[name, version, lang])
183
+ return r
184
+ end
185
+
186
+ def api_get_gemdata_by_name(name)
187
+ r = $gemconn.exec_params("select rubygems.name, versions.number, linksets.home, linksets.code, versions.licenses
188
+ from rubygems, versions, linksets
189
+ where rubygems.id = versions.rubygem_id
190
+ and rubygems.id = linksets.rubygem_id
191
+ and rubygems.name = $1
192
+ and versions.latest = true
193
+ and versions.platform = $2", [name, 'ruby'])
194
+ end
195
+
196
+ def api_get_gemdata_by_name_and_version(name, version)
197
+ r = $gemconn.exec_params("select rubygems.name, versions.number, linksets.home, linksets.code, versions.licenses
198
+ from rubygems, versions, linksets
199
+ where rubygems.id = versions.rubygem_id
200
+ and rubygems.id = linksets.rubygem_id
201
+ and rubygems.name = $1
202
+ and versions.number = $2", [name, version])
203
+ end
204
+
205
+ def api_get_template_result_by_product(name, release_name, release_version)
206
+ list = nil
207
+ r = $conn.exec_params("select product.name, repo.name, pack.name, pack.version, pack.unclear_license, pack.license, pack.license_text, pack.source_url
208
+ from product_repo_pack
209
+ join pack on product_repo_pack.pack_id = pack.id
210
+ join product_repo on product_repo_pack.product_repo_id = product_repo.id
211
+ join repo on product_repo.repo_id = repo.id
212
+ join product on product_repo.product_id = product.id
213
+ join release_tbl on product_repo.release_id = release_tbl.id
214
+ where product.name = $1
215
+ and release_tbl.name = $2
216
+ and release_tbl.version = $3", [name, release_name, release_version])
217
+ if r.ntuples > 0
218
+ list = r
219
+ end
220
+ list
221
+ end
222
+
223
+ def api_get_repo_list_by_product(name, release_name, release_version)
224
+ repo_list = nil
225
+ r = $conn.exec_params("select product_repo.id, repo.name
226
+ from product_repo
227
+ join product on product_repo.product_id = product.id
228
+ join repo on product_repo.repo_id = repo.id
229
+ join release_tbl on product_repo.release_id = release_tbl.id
230
+ where product.name = $1
231
+ and release_tbl.name = $2
232
+ and release_tbl.version = $3", [name, release_name, release_version])
233
+ if r.ntuples > 0
234
+ repo_list = r
235
+ end
236
+ repo_list
237
+ end
238
+
239
+ def api_get_template_result_by_product_repo_id(id)
240
+ list = nil
241
+ r = $conn.exec_params("select pack.name, pack.version, pack.unclear_license, pack.license, pack.license_text, pack.source_url
242
+ from product_repo_pack
243
+ join pack on product_repo_pack.pack_id = pack.id
244
+ where product_repo_pack.product_repo_id = $1", [id])
245
+ if r.ntuples > 0
246
+ list = r
247
+ end
248
+ list
249
+ end
250
+
251
+ def api_get_manifest_download_url(pack_id)
252
+ r = $conn.exec_params("select source_url from pack where id = $1", [pack_id])
253
+ end
254
+
255
+ # org_url: github.com/your_org_or_person_name
256
+ def api_get_whitelist_orgs(org_url)
257
+ r = $conn.exec_params("select * from whitelist_orgs where url = $1", [org_url])
258
+ end
259
+
260
+ def api_get_repo_by_url(url)
261
+ r = $conn.exec_params("select * from repo where source_url = $1", [url])
262
+ end
263
+
264
+ if __FILE__ == $0
265
+ # p api_get_repo_manifest_file_list(80).values[0]
266
+ api_get_complete_ratio(1, 2)
267
+ end
@@ -0,0 +1,20 @@
1
+ require 'pathname'
2
+
3
+ module LicenseAuto
4
+
5
+ end
6
+
7
+
8
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'license_auto'))
9
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
10
+
11
+ # puts $LOAD_PATH
12
+ # puts Pathname.new(__FILE__).dirname.join("license_finder")
13
+
14
+ # require 'license_auto/config/config'
15
+
16
+ require 'license_auto/website'
17
+ require 'license_auto/matcher'
18
+ require 'license_auto/package'
19
+ require 'license_auto/license_info'
20
+
@@ -0,0 +1,52 @@
1
+ require 'log4r'
2
+
3
+ $debug = true
4
+
5
+ AUTO_ROOT = '/tmp/license_auto_cache'
6
+ LAUNCHPAD_SOURCE_DIR = "#{AUTO_ROOT}/lp"
7
+ MANIFEST_SOURCE_DIR = "#{AUTO_ROOT}/manifest"
8
+ if !File.exist?(AUTO_ROOT)
9
+ Dir.mkdir(AUTO_ROOT)
10
+ end
11
+ if !File.exist?(LAUNCHPAD_SOURCE_DIR)
12
+ Dir.mkdir(LAUNCHPAD_SOURCE_DIR)
13
+ end
14
+ if !File.exists?(MANIFEST_SOURCE_DIR)
15
+ Dir.mkdir(MANIFEST_SOURCE_DIR)
16
+ end
17
+
18
+ STD_LICENSE_DIR = "./extractor_ruby/Package_license"
19
+
20
+ pf = Log4r::PatternFormatter.new(
21
+ :pattern => "%d [%l]: %M",
22
+ :date_format => "%Y/%m/%d %H:%M:%S"
23
+ )
24
+
25
+ HTTPARTY_DOWNLOAD_TIMEOUT = 480
26
+
27
+
28
+ Log4r::StderrOutputter.new('console', :formatter => pf)
29
+ if $debug
30
+ filename = "auto.log"
31
+ else
32
+ filename = "#{AUTO_ROOT}/#{rand(1000).to_s}.log"
33
+ end
34
+
35
+ Log4r::FileOutputter.new('logfile',
36
+ :filename => filename,
37
+ :trunc => $debug,
38
+ :formatter => pf,
39
+ :level=>Log4r::DEBUG)
40
+
41
+ $plog = Log4r::Logger.new('auto.log')
42
+ $plog.add('console', 'logfile')
43
+
44
+ def log_usage_example()
45
+ $plog.debug "This is a message with level DEBUG"
46
+ $plog.info "This is a message with level INFO"
47
+ $plog.warn "This is a message with level WARN"
48
+ $plog.error "This is a message with level ERROR"
49
+ $plog.fatal "This is a message with level FATAL"
50
+ end
51
+
52
+ LICENSE_WEBSITE_URL = 'http://localhost:3000'
File without changes
@@ -0,0 +1,30 @@
1
+ ##
2
+ # Statistic
3
+
4
+ LICENSE_SORTED_FREQUENCY = [
5
+ 'MIT',
6
+ 'Apache2.0',
7
+ 'BSD',
8
+ 'GPL2.0',
9
+ 'RubyClause-6',
10
+ 'LGPL2.1',
11
+ 'GPL3.0',
12
+ 'MPL 2.0',
13
+ 'Python',
14
+ 'LGPL3.0',
15
+ 'PublicDomain',
16
+ 'EPL1.0',
17
+ 'LGPL2.0',
18
+ 'MPL1.1',
19
+ 'CDDL1.0',
20
+ 'OpenSSL',
21
+ 'GPL1.0',
22
+ 'PerlArtistic',
23
+ 'CDDL1.1',
24
+ 'Artistic2.0',
25
+ 'TP-Free',
26
+ 'Artistic1.0',
27
+ 'AFL2.1',
28
+ 'CPL1.0',
29
+ 'Apache1.0',
30
+ ]
@@ -0,0 +1,247 @@
1
+ # TODO: move to units.rb
2
+ class License_recognition
3
+ def initialize(path = '')
4
+ @license_text = ''
5
+ @local_license_list = Array.new
6
+ @local_license_path = path
7
+ # TODO: First step, chose the 3 most possible licenses text by keywords; Second step is loop mathing all texts
8
+ # TODO: Find keywords of license text
9
+ @sorted_frequency = ['MIT','MIT2.0','Apache2.0','RubyClause-6','BSD',
10
+ 'GPL2.0','GPL3.0','LGPL2.1','LGPL3.0'] # Often used license name
11
+ @license_extension = ".txt" # Local license file extensions
12
+ @similar_list = Array.new
13
+ @overload = 20000 # Text is too long, unable to identify
14
+ @condition = 0.85 # Similarity value
15
+ end
16
+
17
+ # description : Find the smallest
18
+ def min(a, b, c)
19
+ i = a;
20
+ i = b if i > b
21
+ i = c if i > c
22
+ return i
23
+ end
24
+
25
+ # description : Find the largest
26
+ def max(a, b, c)
27
+ i = a;
28
+ i = b if i < b
29
+ i = c if i < c
30
+ return i
31
+ end
32
+
33
+ # description : edit distance
34
+ def edit_distance(a, b)
35
+ array = Array.new(2){Array.new(a.size+1)}
36
+ array[0][0] = 0
37
+ for i in (1 .. a.size)
38
+ array[0][i] = i
39
+ end
40
+
41
+ for i in (1 .. b.size)
42
+ array[i%2][0] = i
43
+ for j in (1 .. a.size)
44
+ if b[i - 1] == a[j - 1]
45
+ array[i%2][j] = array[(i - 1)%2][j - 1]
46
+ else
47
+ array[i%2][j] = min(array[i%2][j - 1],array[(i - 1)%2][j - 1],array[(i - 1)%2][j]) + 1
48
+ end
49
+ end
50
+ end
51
+ return array[i%2][j]
52
+
53
+ end
54
+
55
+ # description : longest common substring
56
+ def longest_common_substring(a, b)
57
+ array = Array.new(2){Array.new(a.size+1)}
58
+ array[0][0] = 0
59
+ for i in (1 .. a.size)
60
+ array[0][i] = 0
61
+ end
62
+
63
+ for i in (1 .. b.size)
64
+ array[i%2][0] = 0
65
+ for j in (1 .. a.size)
66
+ if b[i - 1] == a[j - 1]
67
+ array[i%2][j] = array[(i - 1)%2][j - 1] + 1
68
+ else
69
+ array[i%2][j] = max(array[i%2][j - 1],array[(i - 1)%2][j - 1],array[(i - 1)%2][j])
70
+ end
71
+ end
72
+ end
73
+ return array[i%2][j]
74
+ end
75
+
76
+ # description : License name list is sorted, commonly used on the front
77
+ # constant : License often used list
78
+ # change : Waiting list license change
79
+ def sequence(constant = @sorted_frequency, change = @local_license_list)
80
+ i = 0
81
+ for j in (0 ... constant.size)
82
+ #p constant[j]
83
+ for k in (i ... change.size)
84
+ if constant[j] == change[k][1]
85
+ tmp = change[i]
86
+ change[i] = change[k]
87
+ change[k] = tmp
88
+ i += 1
89
+ break
90
+ end
91
+ end
92
+ end
93
+ end
94
+
95
+ # description : Get all the local license file path
96
+ # path : Local license folder
97
+ def get_local_license(path = @local_license_path)
98
+ #p @sorted_frequency
99
+ if File.directory?(path)
100
+ Dir.foreach(path) do |file|
101
+ if file != "." and file != ".." and !File.directory?(file) and File.extname(file) == @license_extension
102
+ @local_license_list << [File.expand_path(path + '/' + file), File.basename(file,@license_extension)]
103
+ end
104
+ end
105
+ else
106
+ raise("path: #{path} not found!")
107
+ end
108
+ sequence()
109
+ return @local_license_list
110
+ end
111
+
112
+
113
+ def sort_insert(data)
114
+ flag = false
115
+ if @similar_list.size == 0
116
+ @similar_list << data
117
+ else
118
+ for i in (0 ... @similar_list.size)
119
+ if data[0] > @similar_list[i][0]
120
+ @similar_list.insert(i,data)
121
+ flag = true
122
+ break
123
+ end
124
+ end
125
+ if false == flag
126
+ @similar_list << data
127
+ end
128
+ end
129
+ end
130
+
131
+ # description : similarity
132
+ # 0% : Not the same
133
+ # 100% : The same
134
+ # packge_license : Unrecognized text
135
+ # path : local license text
136
+ def similarity(packge_license, path)
137
+
138
+ get_local_license(path)
139
+
140
+ package_licen_data = packge_license.scan(/\w+/)
141
+ # Text is too long, unable to identify, then return null
142
+ if package_licen_data.size > @overload
143
+ return nil
144
+ end
145
+ local_license_date = Array.new
146
+ @local_license_list.each do |license|
147
+ local_license_date.clear
148
+ local_file = File.readlines(license[0])
149
+ local_file.each do |line|
150
+ local_license_date.concat(line.scan(/\w+/))
151
+ end
152
+ ed = edit_distance(package_licen_data,local_license_date)
153
+ lcs = longest_common_substring(package_licen_data,local_license_date)
154
+ similar = (lcs + 0.0)/(ed + lcs)
155
+ #p license[1]
156
+ tmp = [similar, license[1], "ed[#{ed}]", "lcs[#{lcs}]", "web[#{package_licen_data.size}]", "local[#{local_license_date.size}]"]
157
+ sort_insert(tmp)
158
+
159
+ if similar > @condition
160
+ return license[1]
161
+ end
162
+ end
163
+
164
+ # p @similar_list
165
+
166
+ if @similar_list.size == 0
167
+ return nil
168
+ elsif @similar_list[0][0] > 0.76
169
+ return @similar_list[0][1]
170
+ elsif @similar_list[0][0] > 0.45
171
+ return @similar_list[0][1]
172
+ else
173
+ return nil
174
+ end
175
+ end # def similarity
176
+
177
+ def extract_license_text_from_readme(readme)
178
+ if File.extname(readme['name']) == '.rdoc'
179
+ regular_start = /^==[ *](copying|copy|license){1}:*/i
180
+ regular_end = /^== /
181
+ elsif File.extname(readme['name']) == '.md'
182
+ regular_start = /^##[ *](copying|copy|license){1}:*/i
183
+ regular_end = /^## /
184
+ else
185
+ return nil
186
+ end
187
+
188
+ end
189
+
190
+ end # class License_recognition
191
+
192
+
193
+ if __FILE__ == $0
194
+ license_text = 'Permission is hereby granted, free of charge, to any person obtaining
195
+ a copy of this software and associated documentation files (the
196
+ "Software"), to deal in the Software without restriction, including
197
+ without limitation the rights to use, copy, modify, merge, publish,
198
+ distribute, sublicense, and/or sell copies of the Software, and to
199
+ permit persons to whom the Software is furnished to do so, subject to
200
+ the following conditions:
201
+
202
+ The above copyright notice and this permission notice shall be
203
+ included in all copies or substantial portions of the Software.
204
+
205
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
206
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
207
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
208
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
209
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
210
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
211
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.'
212
+ a = License_recognition.new.similarity(license_text, "./Package_license")
213
+ puts a
214
+ end
215
+
216
+
217
+
218
+
219
+
220
+
221
+
222
+
223
+
224
+
225
+
226
+
227
+
228
+
229
+
230
+
231
+
232
+
233
+
234
+
235
+
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+