tongue 0.2.10.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/tongue +46 -0
- data/lib/linguist.rb +6 -0
- data/lib/linguist/blob_helper.rb +333 -0
- data/lib/linguist/classifier.rb +171 -0
- data/lib/linguist/file_blob.rb +58 -0
- data/lib/linguist/generated.rb +241 -0
- data/lib/linguist/heuristics.rb +38 -0
- data/lib/linguist/language.rb +578 -0
- data/lib/linguist/languages.yml +1901 -0
- data/lib/linguist/md5.rb +38 -0
- data/lib/linguist/popular.yml +29 -0
- data/lib/linguist/repository.rb +95 -0
- data/lib/linguist/samples.json +47115 -0
- data/lib/linguist/samples.rb +149 -0
- data/lib/linguist/tokenizer.rb +198 -0
- data/lib/linguist/vendor.yml +167 -0
- metadata +143 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1a1363397afe6015c6036f01dfd10d6f5e225b9d
|
4
|
+
data.tar.gz: 330d083847d913947882a2afa55cb6bed4d8109d
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8abfb4aab7feec239471bf8ba1731c1052f624dea59ec0d939b8910bc167b3b92ddb01df515e65ca7874f0ce1b9dca376cb715bfda492ab516a1385d3536bc94
|
7
|
+
data.tar.gz: d4a6dfa37d2568b6695e2ad6a90db066b8a772ccb66c0c534ec42912e7c88eb3efff331dc1b5c1008405bd9926ff647a9fa285fcb9b7fc0a37c3917647f5b24a
|
data/bin/tongue
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# tongue — detect language type for a file, or, given a directory, determine language breakdown
|
4
|
+
# usage: tongue <path>
|
5
|
+
|
6
|
+
require 'linguist/file_blob'
|
7
|
+
require 'linguist/repository'
|
8
|
+
|
9
|
+
path = ARGV[0] || Dir.pwd
|
10
|
+
|
11
|
+
if File.directory?(path)
|
12
|
+
repo = Linguist::Repository.from_directory(path)
|
13
|
+
repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
|
14
|
+
percentage = ((size / repo.size.to_f) * 100)
|
15
|
+
percentage = sprintf '%.2f' % percentage
|
16
|
+
puts "%-7s %s" % ["#{percentage}%", language]
|
17
|
+
end
|
18
|
+
elsif File.file?(path)
|
19
|
+
blob = Linguist::FileBlob.new(path, Dir.pwd)
|
20
|
+
type = if blob.text?
|
21
|
+
'Text'
|
22
|
+
elsif blob.image?
|
23
|
+
'Image'
|
24
|
+
else
|
25
|
+
'Binary'
|
26
|
+
end
|
27
|
+
|
28
|
+
puts "#{blob.name}: #{blob.loc} lines (#{blob.sloc} sloc)"
|
29
|
+
puts " type: #{type}"
|
30
|
+
puts " mime type: #{blob.mime_type}"
|
31
|
+
puts " language: #{blob.language}"
|
32
|
+
|
33
|
+
if blob.large?
|
34
|
+
puts " blob is too large to be shown"
|
35
|
+
end
|
36
|
+
|
37
|
+
if blob.generated?
|
38
|
+
puts " appears to be generated source code"
|
39
|
+
end
|
40
|
+
|
41
|
+
if blob.vendored?
|
42
|
+
puts " appears to be a vendored file"
|
43
|
+
end
|
44
|
+
else
|
45
|
+
abort "usage: tongue <path>"
|
46
|
+
end
|
data/lib/linguist.rb
ADDED
@@ -0,0 +1,333 @@
|
|
1
|
+
require 'linguist/generated'
|
2
|
+
require 'linguist/language'
|
3
|
+
|
4
|
+
# require 'charlock_holmes'
|
5
|
+
# require 'escape_utils'
|
6
|
+
# require 'mime/types'
|
7
|
+
require 'pygments'
|
8
|
+
require 'yaml'
|
9
|
+
|
10
|
+
module Linguist
|
11
|
+
# DEPRECATED Avoid mixing into Blob classes. Prefer functional interfaces
|
12
|
+
# like `Language.detect` over `Blob#language`. Functions are much easier to
|
13
|
+
# cache and compose.
|
14
|
+
#
|
15
|
+
# Avoid adding additional bloat to this module.
|
16
|
+
#
|
17
|
+
# BlobHelper is a mixin for Blobish classes that respond to "name",
|
18
|
+
# "data" and "size" such as Grit::Blob.
|
19
|
+
module BlobHelper
|
20
|
+
# Public: Get the extname of the path
|
21
|
+
#
|
22
|
+
# Examples
|
23
|
+
#
|
24
|
+
# blob(name='foo.rb').extname
|
25
|
+
# # => '.rb'
|
26
|
+
#
|
27
|
+
# Returns a String
|
28
|
+
def extname
|
29
|
+
File.extname(name.to_s)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Internal: Lookup mime type for extension.
|
33
|
+
#
|
34
|
+
# Returns a MIME::Type
|
35
|
+
def _mime_type
|
36
|
+
'text/plain'
|
37
|
+
# if defined? @_mime_type
|
38
|
+
# @_mime_type
|
39
|
+
# else
|
40
|
+
# guesses = ::MIME::Types.type_for(extname.to_s)
|
41
|
+
#
|
42
|
+
# # Prefer text mime types over binary
|
43
|
+
# @_mime_type = guesses.detect { |type| type.ascii? } ||
|
44
|
+
# # Otherwise use the first guess
|
45
|
+
# guesses.first
|
46
|
+
# end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Public: Get the actual blob mime type
|
50
|
+
#
|
51
|
+
# Examples
|
52
|
+
#
|
53
|
+
# # => 'text/plain'
|
54
|
+
# # => 'text/html'
|
55
|
+
#
|
56
|
+
# Returns a mime type String.
|
57
|
+
def mime_type
|
58
|
+
'text/plain'
|
59
|
+
end
|
60
|
+
|
61
|
+
# Internal: Is the blob binary according to its mime type
|
62
|
+
#
|
63
|
+
# Return true or false
|
64
|
+
def binary_mime_type?
|
65
|
+
false
|
66
|
+
# _mime_type ? _mime_type.binary? : false
|
67
|
+
end
|
68
|
+
|
69
|
+
# Internal: Is the blob binary according to its mime type,
|
70
|
+
# overriding it if we have better data from the languages.yml
|
71
|
+
# database.
|
72
|
+
#
|
73
|
+
# Return true or false
|
74
|
+
def likely_binary?
|
75
|
+
false
|
76
|
+
# binary_mime_type? && !Language.find_by_filename(name)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Public: Get the Content-Type header value
|
80
|
+
#
|
81
|
+
# This value is used when serving raw blobs.
|
82
|
+
#
|
83
|
+
# Examples
|
84
|
+
#
|
85
|
+
# # => 'text/plain; charset=utf-8'
|
86
|
+
# # => 'application/octet-stream'
|
87
|
+
#
|
88
|
+
# Returns a content type String.
|
89
|
+
def content_type
|
90
|
+
"text/plain"
|
91
|
+
# @content_type ||= (binary_mime_type? || binary?) ? mime_type :
|
92
|
+
# (encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
|
93
|
+
end
|
94
|
+
|
95
|
+
# Public: Get the Content-Disposition header value
|
96
|
+
#
|
97
|
+
# This value is used when serving raw blobs.
|
98
|
+
#
|
99
|
+
# # => "attachment; filename=file.tar"
|
100
|
+
# # => "inline"
|
101
|
+
#
|
102
|
+
# Returns a content disposition String.
|
103
|
+
def disposition
|
104
|
+
if text? || image?
|
105
|
+
'inline'
|
106
|
+
elsif name.nil?
|
107
|
+
"attachment"
|
108
|
+
else
|
109
|
+
'attachment'
|
110
|
+
# "attachment; filename=#{EscapeUtils.escape_url(File.basename(name))}"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def encoding
|
115
|
+
# if hash = detect_encoding
|
116
|
+
'UTF-8'
|
117
|
+
# end
|
118
|
+
end
|
119
|
+
|
120
|
+
# Try to guess the encoding
|
121
|
+
#
|
122
|
+
# Returns: a Hash, with :encoding, :confidence, :type
|
123
|
+
# this will return nil if an error occurred during detection or
|
124
|
+
# no valid encoding could be found
|
125
|
+
def detect_encoding
|
126
|
+
{:encoding => 'UTF-8', :confidence => 100, :type => :text}
|
127
|
+
end
|
128
|
+
|
129
|
+
# Public: Is the blob binary?
|
130
|
+
#
|
131
|
+
# Return true or false
|
132
|
+
def binary?
|
133
|
+
# Large blobs aren't even loaded into memory
|
134
|
+
if data.nil?
|
135
|
+
true
|
136
|
+
else
|
137
|
+
false
|
138
|
+
# end
|
139
|
+
# Treat blank files as text
|
140
|
+
# elsif data == ""
|
141
|
+
# false
|
142
|
+
|
143
|
+
# Charlock doesn't know what to think
|
144
|
+
# elsif encoding.nil?
|
145
|
+
# true
|
146
|
+
|
147
|
+
# If Charlock says its binary
|
148
|
+
# else
|
149
|
+
# detect_encoding[:type] == :binary
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
# Public: Is the blob text?
|
154
|
+
#
|
155
|
+
# Return true or false
|
156
|
+
def text?
|
157
|
+
true
|
158
|
+
end
|
159
|
+
|
160
|
+
# Public: Is the blob a supported image format?
|
161
|
+
#
|
162
|
+
# Return true or false
|
163
|
+
def image?
|
164
|
+
false
|
165
|
+
# ['.png', '.jpg', '.jpeg', '.gif'].include?(extname.downcase)
|
166
|
+
end
|
167
|
+
|
168
|
+
# Public: Is the blob a supported 3D model format?
|
169
|
+
#
|
170
|
+
# Return true or false
|
171
|
+
def solid?
|
172
|
+
false
|
173
|
+
# extname.downcase == '.stl'
|
174
|
+
end
|
175
|
+
|
176
|
+
# Public: Is this blob a CSV file?
|
177
|
+
#
|
178
|
+
# Return true or false
|
179
|
+
def csv?
|
180
|
+
false
|
181
|
+
# text? && extname.downcase == '.csv'
|
182
|
+
end
|
183
|
+
|
184
|
+
# Public: Is the blob a PDF?
|
185
|
+
#
|
186
|
+
# Return true or false
|
187
|
+
def pdf?
|
188
|
+
false
|
189
|
+
# extname.downcase == '.pdf'
|
190
|
+
end
|
191
|
+
|
192
|
+
# MEGABYTE = 1024 * 1024
|
193
|
+
|
194
|
+
# Public: Is the blob too big to load?
|
195
|
+
#
|
196
|
+
# Return true or false
|
197
|
+
def large?
|
198
|
+
false
|
199
|
+
# size.to_i > MEGABYTE
|
200
|
+
end
|
201
|
+
|
202
|
+
# Public: Is the blob safe to colorize?
|
203
|
+
#
|
204
|
+
# We use Pygments for syntax highlighting blobs. Pygments
|
205
|
+
# can be too slow for very large blobs or for certain
|
206
|
+
# corner-case blobs.
|
207
|
+
#
|
208
|
+
# Return true or false
|
209
|
+
def safe_to_colorize?
|
210
|
+
true
|
211
|
+
# !large? && text? && !high_ratio_of_long_lines?
|
212
|
+
end
|
213
|
+
|
214
|
+
# Internal: Does the blob have a ratio of long lines?
|
215
|
+
#
|
216
|
+
# These types of files are usually going to make Pygments.rb
|
217
|
+
# angry if we try to colorize them.
|
218
|
+
#
|
219
|
+
# Return true or false
|
220
|
+
def high_ratio_of_long_lines?
|
221
|
+
false
|
222
|
+
# return false if loc == 0
|
223
|
+
# size / loc > 5000
|
224
|
+
end
|
225
|
+
|
226
|
+
# Public: Is the blob viewable?
|
227
|
+
#
|
228
|
+
# Non-viewable blobs will just show a "View Raw" link
|
229
|
+
#
|
230
|
+
# Return true or false
|
231
|
+
def viewable?
|
232
|
+
true
|
233
|
+
# !large? && text?
|
234
|
+
end
|
235
|
+
|
236
|
+
# vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__))
|
237
|
+
# VendoredRegexp = Regexp.new(vendored_paths.join('|'))
|
238
|
+
|
239
|
+
# Public: Is the blob in a vendored directory?
|
240
|
+
#
|
241
|
+
# Vendored files are ignored by language statistics.
|
242
|
+
#
|
243
|
+
# See "vendor.yml" for a list of vendored conventions that match
|
244
|
+
# this pattern.
|
245
|
+
#
|
246
|
+
# Return true or false
|
247
|
+
def vendored?
|
248
|
+
false
|
249
|
+
# name =~ VendoredRegexp ? true : false
|
250
|
+
end
|
251
|
+
|
252
|
+
# Public: Get each line of data
|
253
|
+
#
|
254
|
+
# Requires Blob#data
|
255
|
+
#
|
256
|
+
# Returns an Array of lines
|
257
|
+
def lines
|
258
|
+
@lines ||=
|
259
|
+
if viewable? && data && !data.nil? && !data == ''
|
260
|
+
data.split(/\r\n|\r|\n/, -1)
|
261
|
+
else
|
262
|
+
[]
|
263
|
+
end
|
264
|
+
end
|
265
|
+
|
266
|
+
# Public: Get number of lines of code
|
267
|
+
#
|
268
|
+
# Requires Blob#data
|
269
|
+
#
|
270
|
+
# Returns Integer
|
271
|
+
def loc
|
272
|
+
lines.size
|
273
|
+
end
|
274
|
+
|
275
|
+
# Public: Get number of source lines of code
|
276
|
+
#
|
277
|
+
# Requires Blob#data
|
278
|
+
#
|
279
|
+
# Returns Integer
|
280
|
+
def sloc
|
281
|
+
lines.grep(/\S/).size
|
282
|
+
end
|
283
|
+
|
284
|
+
# Public: Is the blob a generated file?
|
285
|
+
#
|
286
|
+
# Generated source code is suppressed in diffs and is ignored by
|
287
|
+
# language statistics.
|
288
|
+
#
|
289
|
+
# May load Blob#data
|
290
|
+
#
|
291
|
+
# Return true or false
|
292
|
+
def generated?
|
293
|
+
false
|
294
|
+
# @_generated ||= Generated.generated?(name, lambda { data })
|
295
|
+
end
|
296
|
+
|
297
|
+
# Public: Detects the Language of the blob.
|
298
|
+
#
|
299
|
+
# May load Blob#data
|
300
|
+
#
|
301
|
+
# Returns a Language or nil if none is detected
|
302
|
+
def language
|
303
|
+
return @language if defined? @language
|
304
|
+
|
305
|
+
if defined?(@data) && @data.is_a?(String) && !data == '' && !data.nil?
|
306
|
+
data = @data
|
307
|
+
else
|
308
|
+
data = lambda { self.data }
|
309
|
+
end
|
310
|
+
|
311
|
+
@language = Language.detect(name.to_s, data, mode)
|
312
|
+
end
|
313
|
+
|
314
|
+
# Internal: Get the lexer of the blob.
|
315
|
+
#
|
316
|
+
# Returns a Lexer.
|
317
|
+
def lexer
|
318
|
+
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
|
319
|
+
end
|
320
|
+
|
321
|
+
# Public: Highlight syntax of blob
|
322
|
+
#
|
323
|
+
# options - A Hash of options (defaults to {})
|
324
|
+
#
|
325
|
+
# Returns html String
|
326
|
+
def colorize(options = {})
|
327
|
+
return unless safe_to_colorize?
|
328
|
+
options[:options] ||= {}
|
329
|
+
options[:options][:encoding] ||= encoding
|
330
|
+
lexer.highlight(data, options)
|
331
|
+
end
|
332
|
+
end
|
333
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
require 'linguist/tokenizer'
|
2
|
+
|
3
|
+
module Linguist
|
4
|
+
# Language bayesian classifier.
|
5
|
+
class Classifier
|
6
|
+
# Public: Train classifier that data is a certain language.
|
7
|
+
#
|
8
|
+
# db - Hash classifier database object
|
9
|
+
# language - String language of data
|
10
|
+
# data - String contents of file
|
11
|
+
#
|
12
|
+
# Examples
|
13
|
+
#
|
14
|
+
# Classifier.train(db, 'Ruby', "def hello; end")
|
15
|
+
#
|
16
|
+
# Returns nothing.
|
17
|
+
#
|
18
|
+
# Set LINGUIST_DEBUG=1 or =2 to see probabilities per-token or
|
19
|
+
# per-language. See also #dump_all_tokens, below.
|
20
|
+
def self.train!(db, language, data)
|
21
|
+
tokens = Tokenizer.tokenize(data)
|
22
|
+
|
23
|
+
db['tokens_total'] ||= 0
|
24
|
+
db['languages_total'] ||= 0
|
25
|
+
db['tokens'] ||= {}
|
26
|
+
db['language_tokens'] ||= {}
|
27
|
+
db['languages'] ||= {}
|
28
|
+
|
29
|
+
tokens.each do |token|
|
30
|
+
db['tokens'][language] ||= {}
|
31
|
+
db['tokens'][language][token] ||= 0
|
32
|
+
db['tokens'][language][token] += 1
|
33
|
+
db['language_tokens'][language] ||= 0
|
34
|
+
db['language_tokens'][language] += 1
|
35
|
+
db['tokens_total'] += 1
|
36
|
+
end
|
37
|
+
db['languages'][language] ||= 0
|
38
|
+
db['languages'][language] += 1
|
39
|
+
db['languages_total'] += 1
|
40
|
+
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
|
44
|
+
# Public: Guess language of data.
|
45
|
+
#
|
46
|
+
# db - Hash of classifier tokens database.
|
47
|
+
# data - Array of tokens or String data to analyze.
|
48
|
+
# languages - Array of language name Strings to restrict to.
|
49
|
+
#
|
50
|
+
# Examples
|
51
|
+
#
|
52
|
+
# Classifier.classify(db, "def hello; end")
|
53
|
+
# # => [ 'Ruby', 0.90], ['Python', 0.2], ... ]
|
54
|
+
#
|
55
|
+
# Returns sorted Array of result pairs. Each pair contains the
|
56
|
+
# String language name and a Float score.
|
57
|
+
def self.classify(db, tokens, languages = nil)
|
58
|
+
languages ||= db['languages'].keys
|
59
|
+
new(db).classify(tokens, languages)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Internal: Initialize a Classifier.
|
63
|
+
def initialize(db = {})
|
64
|
+
@tokens_total = db['tokens_total']
|
65
|
+
@languages_total = db['languages_total']
|
66
|
+
@tokens = db['tokens']
|
67
|
+
@language_tokens = db['language_tokens']
|
68
|
+
@languages = db['languages']
|
69
|
+
end
|
70
|
+
|
71
|
+
# Internal: Guess language of data
|
72
|
+
#
|
73
|
+
# data - Array of tokens or String data to analyze.
|
74
|
+
# languages - Array of language name Strings to restrict to.
|
75
|
+
#
|
76
|
+
# Returns sorted Array of result pairs. Each pair contains the
|
77
|
+
# String language name and a Float score.
|
78
|
+
def classify(tokens, languages)
|
79
|
+
return [] if tokens.nil?
|
80
|
+
tokens = Tokenizer.tokenize(tokens) if tokens.is_a?(String)
|
81
|
+
scores = {}
|
82
|
+
|
83
|
+
debug_dump_all_tokens(tokens, languages) if verbosity >= 2
|
84
|
+
|
85
|
+
languages.each do |language|
|
86
|
+
debug_dump_probabilities(tokens, language) if verbosity >= 1
|
87
|
+
scores[language] = tokens_probability(tokens, language) + language_probability(language)
|
88
|
+
end
|
89
|
+
|
90
|
+
scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [score[0], score[1]] }
|
91
|
+
end
|
92
|
+
|
93
|
+
# Internal: Probably of set of tokens in a language occurring - P(D | C)
|
94
|
+
#
|
95
|
+
# tokens - Array of String tokens.
|
96
|
+
# language - Language to check.
|
97
|
+
#
|
98
|
+
# Returns Float between 0.0 and 1.0.
|
99
|
+
def tokens_probability(tokens, language)
|
100
|
+
tokens.inject(0.0) do |sum, token|
|
101
|
+
sum += Math.log(token_probability(token, language))
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# Internal: Probably of token in language occurring - P(F | C)
|
106
|
+
#
|
107
|
+
# token - String token.
|
108
|
+
# language - Language to check.
|
109
|
+
#
|
110
|
+
# Returns Float between 0.0 and 1.0.
|
111
|
+
def token_probability(token, language)
|
112
|
+
if @tokens[language][token].to_f == 0.0
|
113
|
+
1 / @tokens_total.to_f
|
114
|
+
else
|
115
|
+
@tokens[language][token].to_f / @language_tokens[language].to_f
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# Internal: Probably of a language occurring - P(C)
|
120
|
+
#
|
121
|
+
# language - Language to check.
|
122
|
+
#
|
123
|
+
# Returns Float between 0.0 and 1.0.
|
124
|
+
def language_probability(language)
|
125
|
+
Math.log(@languages[language].to_f / @languages_total.to_f)
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
def verbosity
|
130
|
+
@verbosity ||= (ENV['LINGUIST_DEBUG'] || 0).to_i
|
131
|
+
end
|
132
|
+
|
133
|
+
def debug_dump_probabilities(tokens, language)
|
134
|
+
printf("%10s = %10.3f + %7.3f = %10.3f\n",
|
135
|
+
language, tokens_probability(tokens, language), language_probability(language), scores[language])
|
136
|
+
end
|
137
|
+
|
138
|
+
# Internal: show a table of probabilities for each <token,language> pair.
|
139
|
+
#
|
140
|
+
# The number in each table entry is the number of "points" that each
|
141
|
+
# token contributes toward the belief that the file under test is a
|
142
|
+
# particular language. Points are additive.
|
143
|
+
#
|
144
|
+
# Points are the number of times a token appears in the file, times
|
145
|
+
# how much more likely (log of probability ratio) that token is to
|
146
|
+
# appear in one language vs. the least-likely language. Dashes
|
147
|
+
# indicate the least-likely language (and zero points) for each token.
|
148
|
+
def debug_dump_all_tokens(tokens, languages)
|
149
|
+
maxlen = tokens.map { |tok| tok.size }.max
|
150
|
+
|
151
|
+
printf "%#{maxlen}s", ""
|
152
|
+
puts " #" + languages.map { |lang| sprintf("%10s", lang) }.join
|
153
|
+
|
154
|
+
token_map = Hash.new(0)
|
155
|
+
tokens.each { |tok| token_map[tok] += 1 }
|
156
|
+
|
157
|
+
token_map.sort.each { |tok, count|
|
158
|
+
arr = languages.map { |lang| [lang, token_probability(tok, lang)] }
|
159
|
+
min = arr.map { |a,b| b }.min
|
160
|
+
minlog = Math.log(min)
|
161
|
+
if !arr.inject(true) { |result, n| result && n[1] == arr[0][1] }
|
162
|
+
printf "%#{maxlen}s%5d", tok, count
|
163
|
+
|
164
|
+
puts arr.map { |ent|
|
165
|
+
ent[1] == min ? " -" : sprintf("%10.3f", count * (Math.log(ent[1]) - minlog))
|
166
|
+
}.join
|
167
|
+
end
|
168
|
+
}
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|