text-analysis-utils 0.3 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/cache-document +1 -1
- data/bin/classify-new-words +2 -3
- data/bin/find-examples-for +1 -1
- data/bin/frequency-list +12 -6
- data/bin/percentage-known-of +1 -6
- data/bin/play-with-examples +5 -3
- data/bin/proximity-of-words +1 -2
- data/bin/vocabulary-size +6 -0
- data/lib/document-cache.rb +6 -21
- data/lib/game.rb +4 -1
- data/lib/tau_config.rb +23 -0
- data/lib/text-analysis-utils.rb +4 -4
- data/lib/vocabulary-chest.rb +6 -23
- metadata +92 -117
- data/bin/lookup +0 -9
- data/bin/play-with-blanks +0 -28
- data/bin/prepare-text +0 -9
- data/bin/vocabulary-coverage +0 -16
- data/lib/lookup.rb +0 -15
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d7f8baa8dddefcdf0e0cb1897a714eef20cecdc9
|
4
|
+
data.tar.gz: 76e10901cb1e270ca4f1f95d5cee3425b06a1656
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 616397557e53efe884f01f59900221fb93a97dba1ff0ab0769882d1b7bc5b3d0f6482896d9045ce2986a53321e3b832726c2703ebd107ce39d6d93fa1cfa85b2
|
7
|
+
data.tar.gz: 739aa1e163b08bd5869a1b29887daac6d7334fdcdcd122062b3a7d57c8fb10679fcf9ec208e9be1075834f331e40637bee81121fc7efc2c93f5f2c911e602f1d
|
data/bin/cache-document
CHANGED
data/bin/classify-new-words
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'colorize'
|
5
|
-
|
6
|
-
|
5
|
+
require_relative '../lib/vocabulary-chest'
|
6
|
+
require_relative '../lib/document-cache'
|
7
7
|
|
8
8
|
|
9
9
|
def get_text
|
@@ -20,7 +20,6 @@ end
|
|
20
20
|
|
21
21
|
def ask word, index, words, text
|
22
22
|
location = (text =~ /\b#{Regexp.escape(word)}\b/)
|
23
|
-
puts "!!!!" if word == "notwendig"
|
24
23
|
location = text.index(word) if location.nil?
|
25
24
|
(puts "Skipping word: #{word}"; return 'skip') if location.nil?
|
26
25
|
|
data/bin/find-examples-for
CHANGED
data/bin/frequency-list
CHANGED
@@ -1,13 +1,17 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# Call with a file to list words by the frequency of their stems
|
3
|
+
# Call with no arguments to list the frequencies of the words in the vocabulary chest.
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
+
require_relative '../lib/document-cache'
|
6
|
+
require_relative '../lib/vocabulary-chest'
|
5
7
|
|
6
|
-
|
8
|
+
def frequencies text=nil
|
9
|
+
text = DocumentCache::documents.inject(""){|text, f| text+= File.open(f){|f|f.read}; text} if text.nil?
|
7
10
|
|
8
|
-
frequencies = text.split(" ").inject(Hash.new {|hash,key| hash[key] = []}){|hash, w| hash[VocabularyChest::stem(w)] << w; hash }
|
11
|
+
frequencies = text.split(" ").inject(Hash.new {|hash,key| hash[key] = []}){|hash, w| hash[VocabularyChest::stem(w)] << w; hash }
|
9
12
|
|
10
|
-
frequencies = frequencies.sort{|a,b| a[1].size <=> b[1].size}.reverse
|
13
|
+
frequencies = frequencies.sort{|a,b| a[1].size <=> b[1].size}.reverse
|
14
|
+
end
|
11
15
|
|
12
16
|
def output frequencies
|
13
17
|
STDOUT.sync = true
|
@@ -16,6 +20,8 @@ end
|
|
16
20
|
|
17
21
|
if ARGV[0] == "--unknown"
|
18
22
|
output frequencies.find_all{|k,v| !VocabularyChest::is_known?(v[0])}
|
19
|
-
|
23
|
+
elsif ARGV.empty?
|
20
24
|
output frequencies
|
25
|
+
else
|
26
|
+
output(frequencies(File.read(ARGV[0])))
|
21
27
|
end
|
data/bin/percentage-known-of
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
require File.join(File.dirname(__FILE__), '../lib/lookup' )
|
3
|
+
require_relative '../lib/vocabulary-chest'
|
5
4
|
|
6
5
|
def analyse text
|
7
6
|
words = text.split(" ")
|
@@ -18,10 +17,6 @@ def output options
|
|
18
17
|
puts "--"
|
19
18
|
puts "UNKNOWN WORDS: #{unknown.join(", ")}"
|
20
19
|
puts
|
21
|
-
puts "DEFINITIONS"
|
22
|
-
puts Lookup::go(unknown)
|
23
|
-
puts "--"
|
24
|
-
puts
|
25
20
|
puts "Total number of unknown words: #{unknown.size}"
|
26
21
|
puts "Total number of known words: #{known.size}"
|
27
22
|
puts "Total number of words: #{size}"
|
data/bin/play-with-examples
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
require_relative '../lib/document-cache'
|
4
|
+
require_relative '../lib/vocabulary-chest'
|
5
|
+
require_relative '../lib/game'
|
6
6
|
|
7
7
|
def get_input
|
8
8
|
if !ARGV.empty?
|
@@ -17,6 +17,8 @@ words = input.split("\n")
|
|
17
17
|
words.reject!{|w| STDOUT.write("."); STDOUT.flush; DocumentCache.find_examples_for(w).empty?}
|
18
18
|
puts
|
19
19
|
|
20
|
+
puts "Playing with #{words.size} words."
|
21
|
+
|
20
22
|
Game.new(words).play{ |word|
|
21
23
|
matches = DocumentCache.find_examples_for(word, 10).keys
|
22
24
|
sentence = matches.sort{|a, b| a.size <=> b.size}.first
|
data/bin/proximity-of-words
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require 'rubygems'
|
4
3
|
require 'amatch'
|
5
4
|
|
6
5
|
def distance w1, w2
|
@@ -29,7 +28,7 @@ def words_of text
|
|
29
28
|
end
|
30
29
|
|
31
30
|
if ARGV.size < 2
|
32
|
-
puts "usage:
|
31
|
+
puts "usage: #{$0} file_with_new_words file_with_known_words"
|
33
32
|
exit 1
|
34
33
|
end
|
35
34
|
|
data/bin/vocabulary-size
ADDED
data/lib/document-cache.rb
CHANGED
@@ -1,18 +1,13 @@
|
|
1
1
|
require 'fileutils.rb'
|
2
|
-
require 'rubygems'
|
3
2
|
require 'uuid'
|
4
3
|
|
5
|
-
|
6
|
-
|
7
|
-
CACHE_DIR = "#{ROOT_DIR}/docs"
|
8
|
-
|
9
|
-
FileUtils::mkdir_p(ROOT_DIR)
|
10
|
-
FileUtils::mkdir_p(CACHE_DIR)
|
4
|
+
require_relative 'tau_config'
|
5
|
+
require_relative 'vocabulary-chest'
|
11
6
|
|
12
7
|
module DocumentCache
|
13
|
-
def self.add
|
14
|
-
filename = "#{
|
15
|
-
File.open(filename,'w'){|f| f.write(
|
8
|
+
def self.add document
|
9
|
+
filename = "#{TAUConfig::cache_dir}/#{UUID.new.generate}"
|
10
|
+
File.open(filename,'w'){|f| f.write(document)}
|
16
11
|
end
|
17
12
|
|
18
13
|
def self.find_matches_by_stemming search, sentences
|
@@ -52,7 +47,7 @@ module DocumentCache
|
|
52
47
|
end
|
53
48
|
|
54
49
|
def self.documents
|
55
|
-
Dir["#{
|
50
|
+
Dir["#{TAUConfig::cache_dir}/*"]
|
56
51
|
end
|
57
52
|
|
58
53
|
def self.find_examples_for search, count=1
|
@@ -86,13 +81,3 @@ module DocumentCache
|
|
86
81
|
counts.sort_by {|k,v| v}.reverse
|
87
82
|
end
|
88
83
|
end
|
89
|
-
|
90
|
-
if __FILE__ == $0
|
91
|
-
puts "The document cache contains #{DocumentCache.documents.size} documents."
|
92
|
-
puts
|
93
|
-
puts "Here are the 10 most frequent stems:"
|
94
|
-
DocumentCache.stemmed_frequency_list[0,10].each{|stem, count| puts "#{count} #{stem}"}
|
95
|
-
puts
|
96
|
-
puts "Here are the 10 most frequent words:"
|
97
|
-
DocumentCache.frequency_list[0,10].each{|word, count| puts "#{count} #{word}"}
|
98
|
-
end
|
data/lib/game.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
#encoding: UTF-8
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'amatch'
|
3
5
|
require 'colorize'
|
@@ -103,6 +105,8 @@ class Game
|
|
103
105
|
end
|
104
106
|
|
105
107
|
def play &block
|
108
|
+
(puts "Could not find any words to play with."; exit 1) if @words.empty?
|
109
|
+
|
106
110
|
@words.shuffle.each{|word|
|
107
111
|
@turn += 1
|
108
112
|
|
@@ -119,4 +123,3 @@ class Game
|
|
119
123
|
play(&block)
|
120
124
|
end
|
121
125
|
end
|
122
|
-
|
data/lib/tau_config.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'fileutils.rb'
|
2
|
+
|
3
|
+
module TAUConfig
|
4
|
+
def self.root_dir
|
5
|
+
File.expand_path(ENV['vocabulary_chest_location'] || "~/.vocabulary-chest")
|
6
|
+
end
|
7
|
+
def self.known_file
|
8
|
+
"#{root_dir}/known"
|
9
|
+
end
|
10
|
+
def self.unknown_file
|
11
|
+
"#{root_dir}/unknown"
|
12
|
+
end
|
13
|
+
def self.cache_dir
|
14
|
+
"#{root_dir}/docs"
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
FileUtils::mkdir_p TAUConfig.root_dir
|
20
|
+
FileUtils::touch TAUConfig.known_file
|
21
|
+
FileUtils::touch TAUConfig.unknown_file
|
22
|
+
FileUtils::mkdir_p TAUConfig.root_dir
|
23
|
+
FileUtils::mkdir_p TAUConfig.cache_dir
|
data/lib/text-analysis-utils.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
require_relative 'config'
|
2
|
+
require_relative 'vocabulary-chest'
|
3
|
+
require_relative 'document-cache'
|
4
|
+
require_relative 'game'
|
data/lib/vocabulary-chest.rb
CHANGED
@@ -3,30 +3,19 @@ require 'fileutils.rb'
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'lingua/stemmer'
|
5
5
|
|
6
|
-
|
7
|
-
KNOWN_FILE = "#{ROOT_DIR}/known"
|
8
|
-
UNKNOWN_FILE = "#{ROOT_DIR}/unknown"
|
9
|
-
|
10
|
-
FileUtils::mkdir_p(ROOT_DIR)
|
11
|
-
FileUtils.touch(KNOWN_FILE)
|
12
|
-
FileUtils.touch(UNKNOWN_FILE)
|
6
|
+
require_relative 'tau_config'
|
13
7
|
|
14
8
|
module VocabularyChest
|
15
|
-
@known_file = File.open(
|
16
|
-
@unknown_file = File.open(
|
17
|
-
@
|
18
|
-
@unknown_words = nil
|
19
|
-
@stemmer= Lingua::Stemmer.new(:language => "de")
|
20
|
-
|
21
|
-
at_exit {@known_file.close}
|
22
|
-
at_exit {@unknown_file.close}
|
9
|
+
@known_file = File.open(TAUConfig.known_file,'a')
|
10
|
+
@unknown_file = File.open(TAUConfig.unknown_file,'a')
|
11
|
+
@stemmer= Lingua::Stemmer.new(:language => ENV['vocabulary_chest_language'] || "en")
|
23
12
|
|
24
13
|
def self.known_words
|
25
|
-
|
14
|
+
File.open(@known_file,'r'){|f|f.readlines}.collect{|line| line.chomp}
|
26
15
|
end
|
27
16
|
|
28
17
|
def self.unknown_words
|
29
|
-
|
18
|
+
File.open(@unknown_file,'r'){|f|f.readlines}.collect{|line| line.chomp}
|
30
19
|
end
|
31
20
|
|
32
21
|
def self.add_to_known_words word
|
@@ -56,9 +45,3 @@ module VocabularyChest
|
|
56
45
|
word.gsub(/[,\"\.:;()?!„“]/,"")
|
57
46
|
end
|
58
47
|
end
|
59
|
-
|
60
|
-
if __FILE__ == $0
|
61
|
-
known = VocabularyChest::known_words
|
62
|
-
unknown = VocabularyChest::unknown_words
|
63
|
-
puts "The chest contains #{known.size} known words."
|
64
|
-
end
|
metadata
CHANGED
@@ -1,148 +1,123 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: text-analysis-utils
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 3
|
9
|
-
version: "0.3"
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.0
|
10
5
|
platform: ruby
|
11
|
-
authors:
|
12
|
-
-
|
6
|
+
authors:
|
7
|
+
- '@matstc'
|
13
8
|
autorequire:
|
14
9
|
bindir: bin
|
15
10
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2014-02-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
20
14
|
name: colorize
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
hash: 3
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
version: "0"
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
31
20
|
type: :runtime
|
32
|
-
version_requirements: *id001
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: amatch
|
35
21
|
prerelease: false
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: amatch
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
45
34
|
type: :runtime
|
46
|
-
version_requirements: *id002
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: ruby-stemmer
|
49
35
|
prerelease: false
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ruby-stemmer
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
59
48
|
type: :runtime
|
60
|
-
version_requirements: *id003
|
61
|
-
- !ruby/object:Gem::Dependency
|
62
|
-
name: uuid
|
63
49
|
prerelease: false
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: uuid
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
73
62
|
type: :runtime
|
74
|
-
|
75
|
-
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Utilities to help language learners
|
76
70
|
email:
|
77
|
-
executables:
|
78
|
-
- cache-document
|
79
|
-
- classify-new-words
|
80
|
-
- find-examples-for
|
81
|
-
- frequency-list
|
82
|
-
- lookup
|
83
|
-
- percentage-known-of
|
84
|
-
- play-with-blanks
|
85
|
-
- play-with-examples
|
86
|
-
- prepare-text
|
71
|
+
executables:
|
87
72
|
- proximity-of-words
|
73
|
+
- percentage-known-of
|
88
74
|
- readability-of
|
89
|
-
- vocabulary-
|
75
|
+
- vocabulary-size
|
76
|
+
- classify-new-words
|
77
|
+
- play-with-examples
|
78
|
+
- cache-document
|
79
|
+
- frequency-list
|
80
|
+
- find-examples-for
|
90
81
|
extensions: []
|
91
|
-
|
92
82
|
extra_rdoc_files: []
|
93
|
-
|
94
|
-
|
95
|
-
- lib/text-analysis-utils.rb
|
83
|
+
files:
|
84
|
+
- lib/game.rb
|
96
85
|
- lib/document-cache.rb
|
86
|
+
- lib/tau_config.rb
|
87
|
+
- lib/text-analysis-utils.rb
|
97
88
|
- lib/vocabulary-chest.rb
|
98
|
-
- lib/game.rb
|
99
|
-
- lib/lookup.rb
|
100
|
-
- bin/cache-document
|
101
|
-
- bin/classify-new-words
|
102
|
-
- bin/find-examples-for
|
103
|
-
- bin/frequency-list
|
104
|
-
- bin/lookup
|
105
|
-
- bin/percentage-known-of
|
106
|
-
- bin/play-with-blanks
|
107
|
-
- bin/play-with-examples
|
108
|
-
- bin/prepare-text
|
109
89
|
- bin/proximity-of-words
|
90
|
+
- bin/percentage-known-of
|
110
91
|
- bin/readability-of
|
111
|
-
- bin/vocabulary-
|
92
|
+
- bin/vocabulary-size
|
93
|
+
- bin/classify-new-words
|
94
|
+
- bin/play-with-examples
|
95
|
+
- bin/cache-document
|
96
|
+
- bin/frequency-list
|
97
|
+
- bin/find-examples-for
|
112
98
|
homepage: http://github.com/matstc/text-analysis-utils
|
113
|
-
licenses:
|
114
|
-
|
99
|
+
licenses:
|
100
|
+
- CC-BY-NC-SA 4.0
|
101
|
+
metadata: {}
|
115
102
|
post_install_message:
|
116
103
|
rdoc_options: []
|
117
|
-
|
118
|
-
require_paths:
|
104
|
+
require_paths:
|
119
105
|
- lib
|
120
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
version: 1.8.7
|
131
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
132
|
-
none: false
|
133
|
-
requirements:
|
134
|
-
- - ">="
|
135
|
-
- !ruby/object:Gem::Version
|
136
|
-
hash: 3
|
137
|
-
segments:
|
138
|
-
- 0
|
139
|
-
version: "0"
|
106
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 2.0.0
|
111
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - '>='
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '0'
|
140
116
|
requirements: []
|
141
|
-
|
142
117
|
rubyforge_project:
|
143
|
-
rubygems_version: 1.
|
118
|
+
rubygems_version: 2.1.11
|
144
119
|
signing_key:
|
145
|
-
specification_version:
|
120
|
+
specification_version: 4
|
146
121
|
summary: Utilities to help language learners
|
147
122
|
test_files: []
|
148
|
-
|
123
|
+
has_rdoc:
|
data/bin/lookup
DELETED
data/bin/play-with-blanks
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require File.join(File.dirname(__FILE__), '../lib/game' )
|
4
|
-
|
5
|
-
def get_input
|
6
|
-
if !ARGV.empty?
|
7
|
-
else
|
8
|
-
STDIN.read
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
(puts "Usage: #{$0} <file with words to practice> <file with examples>"; exit(1)) if ARGV.size < 2
|
13
|
-
|
14
|
-
input = File.open(ARGV.shift){|f| f.read}
|
15
|
-
words = input.split("\n").uniq
|
16
|
-
|
17
|
-
example_sentences = []
|
18
|
-
ARGV.each{|filename| example_sentences += File.open(filename).readlines}
|
19
|
-
example_sentences.map!{|s| s.chomp}
|
20
|
-
example_sentences.reject!{|s| words.find{|w| (s =~ /\b#{Regexp.escape(w)}\b/i) != nil} == nil}
|
21
|
-
words.reject!{|w| example_sentences.find{|s| (s =~ /\b#{Regexp.escape(w)}\b/i) != nil} == nil}
|
22
|
-
|
23
|
-
puts "Playing with #{example_sentences.size} sentences and #{words.size} words."
|
24
|
-
|
25
|
-
Game.new(words).play{|word|
|
26
|
-
sentence = example_sentences.shuffle.find{|s| (s =~ /\b#{Regexp.escape(word)}\b/i) != nil}
|
27
|
-
[sentence, $&]
|
28
|
-
}
|
data/bin/prepare-text
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
text = ARGV.empty? ? STDIN.read : ARGV.map{|a|File.open(a){|f|f.read}}.join("\n\n")
|
4
|
-
File.open("/tmp/prepared-text", 'w'){|f| f.write(text)}
|
5
|
-
exec("classify-new-words /tmp/prepared-text && \
|
6
|
-
cache-document /tmp/prepared-text && \
|
7
|
-
echo '\nREADABILITY STATISTICS' && \
|
8
|
-
readability-of /tmp/prepared-text && \
|
9
|
-
percentage-known-of /tmp/prepared-text")
|
data/bin/vocabulary-coverage
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
command =<<EOF
|
4
|
-
|
5
|
-
total_occurrences=`expr $(frequency-list | awk '{print $1}' | xargs | sed 's/ / + /g')`
|
6
|
-
unknown_occurrences=`expr $(frequency-list --unknown | awk '{print $1}' | xargs | sed 's/ / + /g')`
|
7
|
-
next_500_occurrences=`expr $(frequency-list --unknown | awk '{print $1}' | head -500 | xargs | sed 's/ / + /g')`
|
8
|
-
|
9
|
-
echo Total occurrences: $total_occurrences
|
10
|
-
echo Unknown occurrences: $unknown_occurrences
|
11
|
-
echo Your current vocabulary knowledge covers $(echo "scale=2;($total_occurrences - $unknown_occurrences) / $total_occurrences * 100" | bc -q)% of all occurrences
|
12
|
-
echo The next 500 words will bring your cover to $(echo "scale=2;($total_occurrences - $unknown_occurrences + $next_500_occurrences) / $total_occurrences * 100" | bc -q)%
|
13
|
-
|
14
|
-
EOF
|
15
|
-
|
16
|
-
system command
|
data/lib/lookup.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
module Lookup
|
2
|
-
|
3
|
-
def self.fetch_definition word
|
4
|
-
definitions = `dict "#{word}" 2>/dev/null | grep ' ' | head -2`.chomp.gsub(" ","").split(/[\r\n]/)
|
5
|
-
definitions.uniq.join(" -- ")
|
6
|
-
end
|
7
|
-
|
8
|
-
def self.sanitize word
|
9
|
-
word.gsub(/[,\.]/,"")
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.go words
|
13
|
-
words.map{|w| sanitize w}.map{|w| "#{w}\t#{fetch_definition w}"}.join("\n")
|
14
|
-
end
|
15
|
-
end
|