RubyGems - text-analysis-utils - Versions diffs - 0.3 → 0.5.0 - Mend

text-analysis-utils 0.3 → 0.5.0

Files changed (20) hide show

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: d7f8baa8dddefcdf0e0cb1897a714eef20cecdc9
+  data.tar.gz: 76e10901cb1e270ca4f1f95d5cee3425b06a1656
+SHA512:
+  metadata.gz: 616397557e53efe884f01f59900221fb93a97dba1ff0ab0769882d1b7bc5b3d0f6482896d9045ce2986a53321e3b832726c2703ebd107ce39d6d93fa1cfa85b2
+  data.tar.gz: 739aa1e163b08bd5869a1b29887daac6d7334fdcdcd122062b3a7d57c8fb10679fcf9ec208e9be1075834f331e40637bee81121fc7efc2c93f5f2c911e602f1d

data/bin/cache-document CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env ruby
-require File.join(File.dirname(__FILE__), '../lib/document-cache' )
+require_relative '../lib/document-cache'
 def get_text
 	if ARGV.empty?

data/bin/classify-new-words CHANGED Viewed

@@ -2,8 +2,8 @@
 require 'rubygems'
 require 'colorize'
-require File.join(File.dirname(__FILE__), '../lib/vocabulary-chest' )
-require File.join(File.dirname(__FILE__), '../lib/document-cache' )
+require_relative '../lib/vocabulary-chest'
+require_relative '../lib/document-cache'
 def get_text
@@ -20,7 +20,6 @@ end
 def ask word, index, words, text
   location = (text =~ /\b#{Regexp.escape(word)}\b/)
-	puts "!!!!" if word == "notwendig"
   location = text.index(word) if location.nil?
 	(puts "Skipping word: #{word}"; return 'skip') if location.nil?

data/bin/find-examples-for CHANGED Viewed

@@ -2,7 +2,7 @@
 require 'rubygems'
 require 'colorize'
-require File.join(File.dirname(__FILE__), '../lib/document-cache' )
+require_relative '../lib/document-cache'
 count = 1
 count_param = ARGV.find{|a| (a =~ /--\d*/) == 0}

data/bin/frequency-list CHANGED Viewed

@@ -1,13 +1,17 @@
 #!/usr/bin/env ruby
+# Call with a file to list words by the frequency of their stems
+# Call with no arguments to list the frequencies of the words in the vocabulary chest.
-require File.join(File.dirname(__FILE__), '../lib/document-cache' )
-require File.join(File.dirname(__FILE__), '../lib/vocabulary-chest' )
+require_relative '../lib/document-cache'
+require_relative '../lib/vocabulary-chest'
-text = DocumentCache::documents.inject(""){|text, f| text+= File.open(f){|f|f.read}; text}
+def frequencies text=nil
+  text = DocumentCache::documents.inject(""){|text, f| text+= File.open(f){|f|f.read}; text} if text.nil?
-frequencies = text.split(" ").inject(Hash.new {|hash,key| hash[key] = []}){|hash, w| hash[VocabularyChest::stem(w)] << w; hash }
+  frequencies = text.split(" ").inject(Hash.new {|hash,key| hash[key] = []}){|hash, w| hash[VocabularyChest::stem(w)] << w; hash }
-frequencies = frequencies.sort{|a,b| a[1].size <=> b[1].size}.reverse
+  frequencies = frequencies.sort{|a,b| a[1].size <=> b[1].size}.reverse
+end
 def output frequencies
   STDOUT.sync = true
@@ -16,6 +20,8 @@ end
 if ARGV[0] == "--unknown"
 	output frequencies.find_all{|k,v| !VocabularyChest::is_known?(v[0])}
-else
+elsif ARGV.empty?
 	output frequencies
+else
+  output(frequencies(File.read(ARGV[0])))
 end

data/bin/percentage-known-of CHANGED Viewed

@@ -1,7 +1,6 @@
 #!/usr/bin/env ruby
-require File.join(File.dirname(__FILE__), '../lib/vocabulary-chest' )
-require File.join(File.dirname(__FILE__), '../lib/lookup' )
+require_relative '../lib/vocabulary-chest'
 def analyse text
 	words = text.split(" ")
@@ -18,10 +17,6 @@ def output options
 	puts "--"
 	puts "UNKNOWN WORDS: #{unknown.join(", ")}"
 	puts
-	puts "DEFINITIONS"
-	puts Lookup::go(unknown)
-	puts "--"
-	puts
 	puts "Total number of unknown words: #{unknown.size}"
 	puts "Total number of known words: #{known.size}"
 	puts "Total number of words: #{size}"

data/bin/play-with-examples CHANGED Viewed

@@ -1,8 +1,8 @@
 #!/usr/bin/env ruby
-require File.join(File.dirname(__FILE__), '../lib/document-cache' )
-require File.join(File.dirname(__FILE__), '../lib/vocabulary-chest' )
-require File.join(File.dirname(__FILE__), '../lib/game' )
+require_relative '../lib/document-cache'
+require_relative '../lib/vocabulary-chest'
+require_relative '../lib/game'
 def get_input
   if !ARGV.empty?
@@ -17,6 +17,8 @@ words = input.split("\n")
 words.reject!{|w| STDOUT.write("."); STDOUT.flush; DocumentCache.find_examples_for(w).empty?}
 puts
+puts "Playing with #{words.size} words."
 Game.new(words).play{ |word|
   matches = DocumentCache.find_examples_for(word, 10).keys
   sentence = matches.sort{|a, b| a.size <=> b.size}.first

data/bin/proximity-of-words CHANGED Viewed

@@ -1,6 +1,5 @@
 #!/usr/bin/env ruby
-require 'rubygems'
 require 'amatch'
 def distance w1, w2
@@ -29,7 +28,7 @@ def words_of text
 end
 if ARGV.size < 2
-  puts "usage: ./script <new text> <known text>"
+  puts "usage: #{$0} file_with_new_words file_with_known_words"
   exit 1
 end

data/bin/vocabulary-size ADDED Viewed

@@ -0,0 +1,6 @@
+#!/usr/bin/env ruby
+require_relative '../lib/vocabulary-chest'
+size = VocabularyChest.known_words.size
+puts "You know #{size} words."

data/lib/document-cache.rb CHANGED Viewed

@@ -1,18 +1,13 @@
 require 'fileutils.rb'
-require 'rubygems'
 require 'uuid'
-require File.join(File.dirname(__FILE__), 'vocabulary-chest' )
-CACHE_DIR = "#{ROOT_DIR}/docs"
-FileUtils::mkdir_p(ROOT_DIR)
-FileUtils::mkdir_p(CACHE_DIR)
+require_relative 'tau_config'
+require_relative 'vocabulary-chest'
 module DocumentCache
-	def self.add search
-		filename = "#{CACHE_DIR}/#{UUID.new.generate}"
-		File.open(filename,'w'){|f| f.write(search)}
+	def self.add document
+		filename = "#{TAUConfig::cache_dir}/#{UUID.new.generate}"
+		File.open(filename,'w'){|f| f.write(document)}
 	end
 	def self.find_matches_by_stemming search, sentences
@@ -52,7 +47,7 @@ module DocumentCache
 	end
 	def self.documents
-		Dir["#{CACHE_DIR}/*"]
+		Dir["#{TAUConfig::cache_dir}/*"]
 	end
 	def self.find_examples_for search, count=1
@@ -86,13 +81,3 @@ module DocumentCache
 		counts.sort_by {|k,v| v}.reverse
 	end
 end
-if __FILE__ == $0
-	puts "The document cache contains #{DocumentCache.documents.size} documents."
-	puts
-	puts "Here are the 10 most frequent stems:"
-	DocumentCache.stemmed_frequency_list[0,10].each{|stem, count| puts "#{count} #{stem}"}
-	puts
-	puts "Here are the 10 most frequent words:"
-	DocumentCache.frequency_list[0,10].each{|word, count| puts "#{count} #{word}"}
-end

data/lib/game.rb CHANGED Viewed

@@ -1,3 +1,5 @@
+#encoding: UTF-8
 require 'rubygems'
 require 'amatch'
 require 'colorize'
@@ -103,6 +105,8 @@ class Game
 	end
 	def play &block
+    (puts "Could not find any words to play with."; exit 1) if @words.empty?
 		@words.shuffle.each{|word|
 			@turn += 1
@@ -119,4 +123,3 @@ class Game
 		play(&block)
 	end
 end

data/lib/tau_config.rb ADDED Viewed

@@ -0,0 +1,23 @@
+require 'fileutils.rb'
+module TAUConfig
+  def self.root_dir
+    File.expand_path(ENV['vocabulary_chest_location'] || "~/.vocabulary-chest")
+  end
+  def self.known_file
+    "#{root_dir}/known"
+  end
+  def self.unknown_file
+    "#{root_dir}/unknown"
+  end
+  def self.cache_dir
+    "#{root_dir}/docs"
+  end
+end
+FileUtils::mkdir_p TAUConfig.root_dir
+FileUtils::touch TAUConfig.known_file
+FileUtils::touch TAUConfig.unknown_file
+FileUtils::mkdir_p TAUConfig.root_dir
+FileUtils::mkdir_p TAUConfig.cache_dir

data/lib/text-analysis-utils.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-require File.join(File.dirname(__FILE__), 'vocabulary-chest' )
-require File.join(File.dirname(__FILE__), 'document-cache' )
-require File.join(File.dirname(__FILE__), 'game' )
+require_relative 'config'
+require_relative 'vocabulary-chest'
+require_relative 'document-cache'
+require_relative 'game'

data/lib/vocabulary-chest.rb CHANGED Viewed

@@ -3,30 +3,19 @@ require 'fileutils.rb'
 require 'rubygems'
 require 'lingua/stemmer'
-ROOT_DIR = File.expand_path(ENV['chest_location'] || "~/.vocabulary-chest")
-KNOWN_FILE = "#{ROOT_DIR}/known"
-UNKNOWN_FILE = "#{ROOT_DIR}/unknown"
-FileUtils::mkdir_p(ROOT_DIR)
-FileUtils.touch(KNOWN_FILE)
-FileUtils.touch(UNKNOWN_FILE)
+require_relative 'tau_config'
 module VocabularyChest
-	@known_file = File.open(KNOWN_FILE,'a')
-	@unknown_file = File.open(UNKNOWN_FILE,'a')
-	@known_words = nil
-	@unknown_words = nil
-	@stemmer= Lingua::Stemmer.new(:language => "de")
-	at_exit {@known_file.close}
-	at_exit {@unknown_file.close}
+	@known_file = File.open(TAUConfig.known_file,'a')
+	@unknown_file = File.open(TAUConfig.unknown_file,'a')
+	@stemmer= Lingua::Stemmer.new(:language => ENV['vocabulary_chest_language'] || "en")
 	def self.known_words
-		@known_words ||= File.open(KNOWN_FILE,'r'){|f|f.readlines}.collect{|line| line.chomp}
+		File.open(@known_file,'r'){|f|f.readlines}.collect{|line| line.chomp}
 	end
 	def self.unknown_words
-		@unknown_words ||= File.open(UNKNOWN_FILE,'r'){|f|f.readlines}.collect{|line| line.chomp}
+		File.open(@unknown_file,'r'){|f|f.readlines}.collect{|line| line.chomp}
 	end
 	def self.add_to_known_words word
@@ -56,9 +45,3 @@ module VocabularyChest
 		word.gsub(/[,\"\.:;()?!„“]/,"")
 	end
 end
-if __FILE__ == $0
-	known = VocabularyChest::known_words
-	unknown = VocabularyChest::unknown_words
-	puts "The chest contains #{known.size} known words."
-end

metadata CHANGED Viewed

@@ -1,148 +1,123 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: text-analysis-utils
-version: !ruby/object:Gem::Version
-  hash: 13
-  prerelease:
-  segments:
-  - 0
-  - 3
-  version: "0.3"
+version: !ruby/object:Gem::Version
+  version: 0.5.0
 platform: ruby
-authors:
-- Matt
+authors:
+- '@matstc'
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-06-23 00:00:00 Z
-dependencies:
-- !ruby/object:Gem::Dependency
+date: 2014-02-26 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
   name: colorize
-  prerelease: false
-  requirement: &id001 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
   type: :runtime
-  version_requirements: *id001
-- !ruby/object:Gem::Dependency
-  name: amatch
   prerelease: false
-  requirement: &id002 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: amatch
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
   type: :runtime
-  version_requirements: *id002
-- !ruby/object:Gem::Dependency
-  name: ruby-stemmer
   prerelease: false
-  requirement: &id003 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: ruby-stemmer
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
   type: :runtime
-  version_requirements: *id003
-- !ruby/object:Gem::Dependency
-  name: uuid
   prerelease: false
-  requirement: &id004 !ruby/object:Gem::Requirement
-    none: false
-    requirements:
-    - - ">="
-      - !ruby/object:Gem::Version
-        hash: 3
-        segments:
-        - 0
-        version: "0"
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: uuid
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
   type: :runtime
-  version_requirements: *id004
-description:
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description: Utilities to help language learners
 email:
-executables:
-- cache-document
-- classify-new-words
-- find-examples-for
-- frequency-list
-- lookup
-- percentage-known-of
-- play-with-blanks
-- play-with-examples
-- prepare-text
+executables:
 - proximity-of-words
+- percentage-known-of
 - readability-of
-- vocabulary-coverage
+- vocabulary-size
+- classify-new-words
+- play-with-examples
+- cache-document
+- frequency-list
+- find-examples-for
 extensions: []
 extra_rdoc_files: []
-files:
-- lib/text-analysis-utils.rb
+files:
+- lib/game.rb
 - lib/document-cache.rb
+- lib/tau_config.rb
+- lib/text-analysis-utils.rb
 - lib/vocabulary-chest.rb
-- lib/game.rb
-- lib/lookup.rb
-- bin/cache-document
-- bin/classify-new-words
-- bin/find-examples-for
-- bin/frequency-list
-- bin/lookup
-- bin/percentage-known-of
-- bin/play-with-blanks
-- bin/play-with-examples
-- bin/prepare-text
 - bin/proximity-of-words
+- bin/percentage-known-of
 - bin/readability-of
-- bin/vocabulary-coverage
+- bin/vocabulary-size
+- bin/classify-new-words
+- bin/play-with-examples
+- bin/cache-document
+- bin/frequency-list
+- bin/find-examples-for
 homepage: http://github.com/matstc/text-analysis-utils
-licenses: []
+licenses:
+- CC-BY-NC-SA 4.0
+metadata: {}
 post_install_message:
 rdoc_options: []
-require_paths:
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
-  none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      hash: 57
-      segments:
-      - 1
-      - 8
-      - 7
-      version: 1.8.7
-required_rubygems_version: !ruby/object:Gem::Requirement
-  none: false
-  requirements:
-  - - ">="
-    - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
-      version: "0"
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: 2.0.0
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 1.8.15
+rubygems_version: 2.1.11
 signing_key:
-specification_version: 3
+specification_version: 4
 summary: Utilities to help language learners
 test_files: []
+has_rdoc:

data/bin/lookup DELETED Viewed

@@ -1,9 +0,0 @@
-#!/usr/bin/env ruby
-require File.join(File.dirname(__FILE__), '../lib/lookup' )
-if !ARGV.empty?
-  puts Lookup::go ARGV
-else
-  puts Lookup::go STDIN.read.split("\n")
-end

data/bin/play-with-blanks DELETED Viewed

@@ -1,28 +0,0 @@
-#!/usr/bin/env ruby
-require File.join(File.dirname(__FILE__), '../lib/game' )
-def get_input
-  if !ARGV.empty?
-  else
-    STDIN.read
-  end
-end
-(puts "Usage: #{$0} <file with words to practice> <file with examples>"; exit(1)) if ARGV.size < 2
-input = File.open(ARGV.shift){|f| f.read}
-words = input.split("\n").uniq
-example_sentences = []
-ARGV.each{|filename| example_sentences += File.open(filename).readlines}
-example_sentences.map!{|s| s.chomp}
-example_sentences.reject!{|s| words.find{|w| (s =~ /\b#{Regexp.escape(w)}\b/i) != nil} == nil}
-words.reject!{|w| example_sentences.find{|s| (s =~ /\b#{Regexp.escape(w)}\b/i) != nil} == nil}
-puts "Playing with #{example_sentences.size} sentences and #{words.size} words."
-Game.new(words).play{|word|
-  sentence = example_sentences.shuffle.find{|s| (s =~ /\b#{Regexp.escape(word)}\b/i) != nil}
-  [sentence, $&]
-}

data/bin/prepare-text DELETED Viewed

@@ -1,9 +0,0 @@
-#!/usr/bin/env ruby
-text = ARGV.empty? ? STDIN.read : ARGV.map{|a|File.open(a){|f|f.read}}.join("\n\n")
-File.open("/tmp/prepared-text", 'w'){|f| f.write(text)}
-exec("classify-new-words /tmp/prepared-text && \
-	cache-document /tmp/prepared-text && \
-	echo '\nREADABILITY STATISTICS' && \
-	readability-of /tmp/prepared-text && \
-	percentage-known-of /tmp/prepared-text")

data/bin/vocabulary-coverage DELETED Viewed

@@ -1,16 +0,0 @@
-#!/usr/bin/env ruby
-command =<<EOF
-total_occurrences=`expr $(frequency-list | awk '{print $1}' | xargs | sed 's/ / + /g')`
-unknown_occurrences=`expr $(frequency-list --unknown | awk '{print $1}' | xargs | sed 's/ / + /g')`
-next_500_occurrences=`expr $(frequency-list --unknown | awk '{print $1}' | head -500 | xargs | sed 's/ / + /g')`
-echo Total occurrences: $total_occurrences
-echo Unknown occurrences: $unknown_occurrences
-echo Your current vocabulary knowledge covers $(echo "scale=2;($total_occurrences - $unknown_occurrences) / $total_occurrences * 100" | bc -q)% of all occurrences
-echo The next 500 words will bring your cover to $(echo "scale=2;($total_occurrences - $unknown_occurrences + $next_500_occurrences) / $total_occurrences * 100" | bc -q)%
-EOF
-system command

data/lib/lookup.rb DELETED Viewed

@@ -1,15 +0,0 @@
-module Lookup
-	def self.fetch_definition word
-		definitions = `dict "#{word}" 2>/dev/null | grep '     ' | head -2`.chomp.gsub("     ","").split(/[\r\n]/)
-		definitions.uniq.join(" -- ")
-	end
-	def self.sanitize word
-		word.gsub(/[,\.]/,"")
-	end
-	def self.go words
-		words.map{|w| sanitize w}.map{|w| "#{w}\t#{fetch_definition w}"}.join("\n")
-	end
-end