text-analysis-utils 0.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/cache-document +1 -1
- data/bin/classify-new-words +2 -3
- data/bin/find-examples-for +1 -1
- data/bin/frequency-list +12 -6
- data/bin/percentage-known-of +1 -6
- data/bin/play-with-examples +5 -3
- data/bin/proximity-of-words +1 -2
- data/bin/vocabulary-size +6 -0
- data/lib/document-cache.rb +6 -21
- data/lib/game.rb +4 -1
- data/lib/tau_config.rb +23 -0
- data/lib/text-analysis-utils.rb +4 -4
- data/lib/vocabulary-chest.rb +6 -23
- metadata +92 -117
- data/bin/lookup +0 -9
- data/bin/play-with-blanks +0 -28
- data/bin/prepare-text +0 -9
- data/bin/vocabulary-coverage +0 -16
- data/lib/lookup.rb +0 -15
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d7f8baa8dddefcdf0e0cb1897a714eef20cecdc9
|
4
|
+
data.tar.gz: 76e10901cb1e270ca4f1f95d5cee3425b06a1656
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 616397557e53efe884f01f59900221fb93a97dba1ff0ab0769882d1b7bc5b3d0f6482896d9045ce2986a53321e3b832726c2703ebd107ce39d6d93fa1cfa85b2
|
7
|
+
data.tar.gz: 739aa1e163b08bd5869a1b29887daac6d7334fdcdcd122062b3a7d57c8fb10679fcf9ec208e9be1075834f331e40637bee81121fc7efc2c93f5f2c911e602f1d
|
data/bin/cache-document
CHANGED
data/bin/classify-new-words
CHANGED
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'colorize'
|
5
|
-
|
6
|
-
|
5
|
+
require_relative '../lib/vocabulary-chest'
|
6
|
+
require_relative '../lib/document-cache'
|
7
7
|
|
8
8
|
|
9
9
|
def get_text
|
@@ -20,7 +20,6 @@ end
|
|
20
20
|
|
21
21
|
def ask word, index, words, text
|
22
22
|
location = (text =~ /\b#{Regexp.escape(word)}\b/)
|
23
|
-
puts "!!!!" if word == "notwendig"
|
24
23
|
location = text.index(word) if location.nil?
|
25
24
|
(puts "Skipping word: #{word}"; return 'skip') if location.nil?
|
26
25
|
|
data/bin/find-examples-for
CHANGED
data/bin/frequency-list
CHANGED
@@ -1,13 +1,17 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
|
+
# Call with a file to list words by the frequency of their stems
|
3
|
+
# Call with no arguments to list the frequencies of the words in the vocabulary chest.
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
+
require_relative '../lib/document-cache'
|
6
|
+
require_relative '../lib/vocabulary-chest'
|
5
7
|
|
6
|
-
|
8
|
+
def frequencies text=nil
|
9
|
+
text = DocumentCache::documents.inject(""){|text, f| text+= File.open(f){|f|f.read}; text} if text.nil?
|
7
10
|
|
8
|
-
frequencies = text.split(" ").inject(Hash.new {|hash,key| hash[key] = []}){|hash, w| hash[VocabularyChest::stem(w)] << w; hash }
|
11
|
+
frequencies = text.split(" ").inject(Hash.new {|hash,key| hash[key] = []}){|hash, w| hash[VocabularyChest::stem(w)] << w; hash }
|
9
12
|
|
10
|
-
frequencies = frequencies.sort{|a,b| a[1].size <=> b[1].size}.reverse
|
13
|
+
frequencies = frequencies.sort{|a,b| a[1].size <=> b[1].size}.reverse
|
14
|
+
end
|
11
15
|
|
12
16
|
def output frequencies
|
13
17
|
STDOUT.sync = true
|
@@ -16,6 +20,8 @@ end
|
|
16
20
|
|
17
21
|
if ARGV[0] == "--unknown"
|
18
22
|
output frequencies.find_all{|k,v| !VocabularyChest::is_known?(v[0])}
|
19
|
-
|
23
|
+
elsif ARGV.empty?
|
20
24
|
output frequencies
|
25
|
+
else
|
26
|
+
output(frequencies(File.read(ARGV[0])))
|
21
27
|
end
|
data/bin/percentage-known-of
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
require File.join(File.dirname(__FILE__), '../lib/lookup' )
|
3
|
+
require_relative '../lib/vocabulary-chest'
|
5
4
|
|
6
5
|
def analyse text
|
7
6
|
words = text.split(" ")
|
@@ -18,10 +17,6 @@ def output options
|
|
18
17
|
puts "--"
|
19
18
|
puts "UNKNOWN WORDS: #{unknown.join(", ")}"
|
20
19
|
puts
|
21
|
-
puts "DEFINITIONS"
|
22
|
-
puts Lookup::go(unknown)
|
23
|
-
puts "--"
|
24
|
-
puts
|
25
20
|
puts "Total number of unknown words: #{unknown.size}"
|
26
21
|
puts "Total number of known words: #{known.size}"
|
27
22
|
puts "Total number of words: #{size}"
|
data/bin/play-with-examples
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
require_relative '../lib/document-cache'
|
4
|
+
require_relative '../lib/vocabulary-chest'
|
5
|
+
require_relative '../lib/game'
|
6
6
|
|
7
7
|
def get_input
|
8
8
|
if !ARGV.empty?
|
@@ -17,6 +17,8 @@ words = input.split("\n")
|
|
17
17
|
words.reject!{|w| STDOUT.write("."); STDOUT.flush; DocumentCache.find_examples_for(w).empty?}
|
18
18
|
puts
|
19
19
|
|
20
|
+
puts "Playing with #{words.size} words."
|
21
|
+
|
20
22
|
Game.new(words).play{ |word|
|
21
23
|
matches = DocumentCache.find_examples_for(word, 10).keys
|
22
24
|
sentence = matches.sort{|a, b| a.size <=> b.size}.first
|
data/bin/proximity-of-words
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require 'rubygems'
|
4
3
|
require 'amatch'
|
5
4
|
|
6
5
|
def distance w1, w2
|
@@ -29,7 +28,7 @@ def words_of text
|
|
29
28
|
end
|
30
29
|
|
31
30
|
if ARGV.size < 2
|
32
|
-
puts "usage:
|
31
|
+
puts "usage: #{$0} file_with_new_words file_with_known_words"
|
33
32
|
exit 1
|
34
33
|
end
|
35
34
|
|
data/bin/vocabulary-size
ADDED
data/lib/document-cache.rb
CHANGED
@@ -1,18 +1,13 @@
|
|
1
1
|
require 'fileutils.rb'
|
2
|
-
require 'rubygems'
|
3
2
|
require 'uuid'
|
4
3
|
|
5
|
-
|
6
|
-
|
7
|
-
CACHE_DIR = "#{ROOT_DIR}/docs"
|
8
|
-
|
9
|
-
FileUtils::mkdir_p(ROOT_DIR)
|
10
|
-
FileUtils::mkdir_p(CACHE_DIR)
|
4
|
+
require_relative 'tau_config'
|
5
|
+
require_relative 'vocabulary-chest'
|
11
6
|
|
12
7
|
module DocumentCache
|
13
|
-
def self.add
|
14
|
-
filename = "#{
|
15
|
-
File.open(filename,'w'){|f| f.write(
|
8
|
+
def self.add document
|
9
|
+
filename = "#{TAUConfig::cache_dir}/#{UUID.new.generate}"
|
10
|
+
File.open(filename,'w'){|f| f.write(document)}
|
16
11
|
end
|
17
12
|
|
18
13
|
def self.find_matches_by_stemming search, sentences
|
@@ -52,7 +47,7 @@ module DocumentCache
|
|
52
47
|
end
|
53
48
|
|
54
49
|
def self.documents
|
55
|
-
Dir["#{
|
50
|
+
Dir["#{TAUConfig::cache_dir}/*"]
|
56
51
|
end
|
57
52
|
|
58
53
|
def self.find_examples_for search, count=1
|
@@ -86,13 +81,3 @@ module DocumentCache
|
|
86
81
|
counts.sort_by {|k,v| v}.reverse
|
87
82
|
end
|
88
83
|
end
|
89
|
-
|
90
|
-
if __FILE__ == $0
|
91
|
-
puts "The document cache contains #{DocumentCache.documents.size} documents."
|
92
|
-
puts
|
93
|
-
puts "Here are the 10 most frequent stems:"
|
94
|
-
DocumentCache.stemmed_frequency_list[0,10].each{|stem, count| puts "#{count} #{stem}"}
|
95
|
-
puts
|
96
|
-
puts "Here are the 10 most frequent words:"
|
97
|
-
DocumentCache.frequency_list[0,10].each{|word, count| puts "#{count} #{word}"}
|
98
|
-
end
|
data/lib/game.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
#encoding: UTF-8
|
2
|
+
|
1
3
|
require 'rubygems'
|
2
4
|
require 'amatch'
|
3
5
|
require 'colorize'
|
@@ -103,6 +105,8 @@ class Game
|
|
103
105
|
end
|
104
106
|
|
105
107
|
def play &block
|
108
|
+
(puts "Could not find any words to play with."; exit 1) if @words.empty?
|
109
|
+
|
106
110
|
@words.shuffle.each{|word|
|
107
111
|
@turn += 1
|
108
112
|
|
@@ -119,4 +123,3 @@ class Game
|
|
119
123
|
play(&block)
|
120
124
|
end
|
121
125
|
end
|
122
|
-
|
data/lib/tau_config.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'fileutils.rb'
|
2
|
+
|
3
|
+
module TAUConfig
|
4
|
+
def self.root_dir
|
5
|
+
File.expand_path(ENV['vocabulary_chest_location'] || "~/.vocabulary-chest")
|
6
|
+
end
|
7
|
+
def self.known_file
|
8
|
+
"#{root_dir}/known"
|
9
|
+
end
|
10
|
+
def self.unknown_file
|
11
|
+
"#{root_dir}/unknown"
|
12
|
+
end
|
13
|
+
def self.cache_dir
|
14
|
+
"#{root_dir}/docs"
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
FileUtils::mkdir_p TAUConfig.root_dir
|
20
|
+
FileUtils::touch TAUConfig.known_file
|
21
|
+
FileUtils::touch TAUConfig.unknown_file
|
22
|
+
FileUtils::mkdir_p TAUConfig.root_dir
|
23
|
+
FileUtils::mkdir_p TAUConfig.cache_dir
|
data/lib/text-analysis-utils.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
require_relative 'config'
|
2
|
+
require_relative 'vocabulary-chest'
|
3
|
+
require_relative 'document-cache'
|
4
|
+
require_relative 'game'
|
data/lib/vocabulary-chest.rb
CHANGED
@@ -3,30 +3,19 @@ require 'fileutils.rb'
|
|
3
3
|
require 'rubygems'
|
4
4
|
require 'lingua/stemmer'
|
5
5
|
|
6
|
-
|
7
|
-
KNOWN_FILE = "#{ROOT_DIR}/known"
|
8
|
-
UNKNOWN_FILE = "#{ROOT_DIR}/unknown"
|
9
|
-
|
10
|
-
FileUtils::mkdir_p(ROOT_DIR)
|
11
|
-
FileUtils.touch(KNOWN_FILE)
|
12
|
-
FileUtils.touch(UNKNOWN_FILE)
|
6
|
+
require_relative 'tau_config'
|
13
7
|
|
14
8
|
module VocabularyChest
|
15
|
-
@known_file = File.open(
|
16
|
-
@unknown_file = File.open(
|
17
|
-
@
|
18
|
-
@unknown_words = nil
|
19
|
-
@stemmer= Lingua::Stemmer.new(:language => "de")
|
20
|
-
|
21
|
-
at_exit {@known_file.close}
|
22
|
-
at_exit {@unknown_file.close}
|
9
|
+
@known_file = File.open(TAUConfig.known_file,'a')
|
10
|
+
@unknown_file = File.open(TAUConfig.unknown_file,'a')
|
11
|
+
@stemmer= Lingua::Stemmer.new(:language => ENV['vocabulary_chest_language'] || "en")
|
23
12
|
|
24
13
|
def self.known_words
|
25
|
-
|
14
|
+
File.open(@known_file,'r'){|f|f.readlines}.collect{|line| line.chomp}
|
26
15
|
end
|
27
16
|
|
28
17
|
def self.unknown_words
|
29
|
-
|
18
|
+
File.open(@unknown_file,'r'){|f|f.readlines}.collect{|line| line.chomp}
|
30
19
|
end
|
31
20
|
|
32
21
|
def self.add_to_known_words word
|
@@ -56,9 +45,3 @@ module VocabularyChest
|
|
56
45
|
word.gsub(/[,\"\.:;()?!„“]/,"")
|
57
46
|
end
|
58
47
|
end
|
59
|
-
|
60
|
-
if __FILE__ == $0
|
61
|
-
known = VocabularyChest::known_words
|
62
|
-
unknown = VocabularyChest::unknown_words
|
63
|
-
puts "The chest contains #{known.size} known words."
|
64
|
-
end
|
metadata
CHANGED
@@ -1,148 +1,123 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: text-analysis-utils
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 3
|
9
|
-
version: "0.3"
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.0
|
10
5
|
platform: ruby
|
11
|
-
authors:
|
12
|
-
-
|
6
|
+
authors:
|
7
|
+
- '@matstc'
|
13
8
|
autorequire:
|
14
9
|
bindir: bin
|
15
10
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
- !ruby/object:Gem::Dependency
|
11
|
+
date: 2014-02-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
20
14
|
name: colorize
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
hash: 3
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
version: "0"
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
31
20
|
type: :runtime
|
32
|
-
version_requirements: *id001
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: amatch
|
35
21
|
prerelease: false
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: amatch
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
45
34
|
type: :runtime
|
46
|
-
version_requirements: *id002
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: ruby-stemmer
|
49
35
|
prerelease: false
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: ruby-stemmer
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
59
48
|
type: :runtime
|
60
|
-
version_requirements: *id003
|
61
|
-
- !ruby/object:Gem::Dependency
|
62
|
-
name: uuid
|
63
49
|
prerelease: false
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: uuid
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
73
62
|
type: :runtime
|
74
|
-
|
75
|
-
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Utilities to help language learners
|
76
70
|
email:
|
77
|
-
executables:
|
78
|
-
- cache-document
|
79
|
-
- classify-new-words
|
80
|
-
- find-examples-for
|
81
|
-
- frequency-list
|
82
|
-
- lookup
|
83
|
-
- percentage-known-of
|
84
|
-
- play-with-blanks
|
85
|
-
- play-with-examples
|
86
|
-
- prepare-text
|
71
|
+
executables:
|
87
72
|
- proximity-of-words
|
73
|
+
- percentage-known-of
|
88
74
|
- readability-of
|
89
|
-
- vocabulary-
|
75
|
+
- vocabulary-size
|
76
|
+
- classify-new-words
|
77
|
+
- play-with-examples
|
78
|
+
- cache-document
|
79
|
+
- frequency-list
|
80
|
+
- find-examples-for
|
90
81
|
extensions: []
|
91
|
-
|
92
82
|
extra_rdoc_files: []
|
93
|
-
|
94
|
-
|
95
|
-
- lib/text-analysis-utils.rb
|
83
|
+
files:
|
84
|
+
- lib/game.rb
|
96
85
|
- lib/document-cache.rb
|
86
|
+
- lib/tau_config.rb
|
87
|
+
- lib/text-analysis-utils.rb
|
97
88
|
- lib/vocabulary-chest.rb
|
98
|
-
- lib/game.rb
|
99
|
-
- lib/lookup.rb
|
100
|
-
- bin/cache-document
|
101
|
-
- bin/classify-new-words
|
102
|
-
- bin/find-examples-for
|
103
|
-
- bin/frequency-list
|
104
|
-
- bin/lookup
|
105
|
-
- bin/percentage-known-of
|
106
|
-
- bin/play-with-blanks
|
107
|
-
- bin/play-with-examples
|
108
|
-
- bin/prepare-text
|
109
89
|
- bin/proximity-of-words
|
90
|
+
- bin/percentage-known-of
|
110
91
|
- bin/readability-of
|
111
|
-
- bin/vocabulary-
|
92
|
+
- bin/vocabulary-size
|
93
|
+
- bin/classify-new-words
|
94
|
+
- bin/play-with-examples
|
95
|
+
- bin/cache-document
|
96
|
+
- bin/frequency-list
|
97
|
+
- bin/find-examples-for
|
112
98
|
homepage: http://github.com/matstc/text-analysis-utils
|
113
|
-
licenses:
|
114
|
-
|
99
|
+
licenses:
|
100
|
+
- CC-BY-NC-SA 4.0
|
101
|
+
metadata: {}
|
115
102
|
post_install_message:
|
116
103
|
rdoc_options: []
|
117
|
-
|
118
|
-
require_paths:
|
104
|
+
require_paths:
|
119
105
|
- lib
|
120
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
version: 1.8.7
|
131
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
132
|
-
none: false
|
133
|
-
requirements:
|
134
|
-
- - ">="
|
135
|
-
- !ruby/object:Gem::Version
|
136
|
-
hash: 3
|
137
|
-
segments:
|
138
|
-
- 0
|
139
|
-
version: "0"
|
106
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - '>='
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 2.0.0
|
111
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
|
+
requirements:
|
113
|
+
- - '>='
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: '0'
|
140
116
|
requirements: []
|
141
|
-
|
142
117
|
rubyforge_project:
|
143
|
-
rubygems_version: 1.
|
118
|
+
rubygems_version: 2.1.11
|
144
119
|
signing_key:
|
145
|
-
specification_version:
|
120
|
+
specification_version: 4
|
146
121
|
summary: Utilities to help language learners
|
147
122
|
test_files: []
|
148
|
-
|
123
|
+
has_rdoc:
|
data/bin/lookup
DELETED
data/bin/play-with-blanks
DELETED
@@ -1,28 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require File.join(File.dirname(__FILE__), '../lib/game' )
|
4
|
-
|
5
|
-
def get_input
|
6
|
-
if !ARGV.empty?
|
7
|
-
else
|
8
|
-
STDIN.read
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
(puts "Usage: #{$0} <file with words to practice> <file with examples>"; exit(1)) if ARGV.size < 2
|
13
|
-
|
14
|
-
input = File.open(ARGV.shift){|f| f.read}
|
15
|
-
words = input.split("\n").uniq
|
16
|
-
|
17
|
-
example_sentences = []
|
18
|
-
ARGV.each{|filename| example_sentences += File.open(filename).readlines}
|
19
|
-
example_sentences.map!{|s| s.chomp}
|
20
|
-
example_sentences.reject!{|s| words.find{|w| (s =~ /\b#{Regexp.escape(w)}\b/i) != nil} == nil}
|
21
|
-
words.reject!{|w| example_sentences.find{|s| (s =~ /\b#{Regexp.escape(w)}\b/i) != nil} == nil}
|
22
|
-
|
23
|
-
puts "Playing with #{example_sentences.size} sentences and #{words.size} words."
|
24
|
-
|
25
|
-
Game.new(words).play{|word|
|
26
|
-
sentence = example_sentences.shuffle.find{|s| (s =~ /\b#{Regexp.escape(word)}\b/i) != nil}
|
27
|
-
[sentence, $&]
|
28
|
-
}
|
data/bin/prepare-text
DELETED
@@ -1,9 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
text = ARGV.empty? ? STDIN.read : ARGV.map{|a|File.open(a){|f|f.read}}.join("\n\n")
|
4
|
-
File.open("/tmp/prepared-text", 'w'){|f| f.write(text)}
|
5
|
-
exec("classify-new-words /tmp/prepared-text && \
|
6
|
-
cache-document /tmp/prepared-text && \
|
7
|
-
echo '\nREADABILITY STATISTICS' && \
|
8
|
-
readability-of /tmp/prepared-text && \
|
9
|
-
percentage-known-of /tmp/prepared-text")
|
data/bin/vocabulary-coverage
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
command =<<EOF
|
4
|
-
|
5
|
-
total_occurrences=`expr $(frequency-list | awk '{print $1}' | xargs | sed 's/ / + /g')`
|
6
|
-
unknown_occurrences=`expr $(frequency-list --unknown | awk '{print $1}' | xargs | sed 's/ / + /g')`
|
7
|
-
next_500_occurrences=`expr $(frequency-list --unknown | awk '{print $1}' | head -500 | xargs | sed 's/ / + /g')`
|
8
|
-
|
9
|
-
echo Total occurrences: $total_occurrences
|
10
|
-
echo Unknown occurrences: $unknown_occurrences
|
11
|
-
echo Your current vocabulary knowledge covers $(echo "scale=2;($total_occurrences - $unknown_occurrences) / $total_occurrences * 100" | bc -q)% of all occurrences
|
12
|
-
echo The next 500 words will bring your cover to $(echo "scale=2;($total_occurrences - $unknown_occurrences + $next_500_occurrences) / $total_occurrences * 100" | bc -q)%
|
13
|
-
|
14
|
-
EOF
|
15
|
-
|
16
|
-
system command
|
data/lib/lookup.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
module Lookup
|
2
|
-
|
3
|
-
def self.fetch_definition word
|
4
|
-
definitions = `dict "#{word}" 2>/dev/null | grep ' ' | head -2`.chomp.gsub(" ","").split(/[\r\n]/)
|
5
|
-
definitions.uniq.join(" -- ")
|
6
|
-
end
|
7
|
-
|
8
|
-
def self.sanitize word
|
9
|
-
word.gsub(/[,\.]/,"")
|
10
|
-
end
|
11
|
-
|
12
|
-
def self.go words
|
13
|
-
words.map{|w| sanitize w}.map{|w| "#{w}\t#{fetch_definition w}"}.join("\n")
|
14
|
-
end
|
15
|
-
end
|