RubyGems - tomereader - Versions diffs - 0.0.1 - Mend

tomereader 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

data/data/three_phrases.txt ADDED Viewed

@@ -0,0 +1,4 @@
+I see a narrow face, wide, round eyes, and a long, thin nose—I still look
+like a little girl, though sometime in the last few months I turned sixteen. The other factions celebrate birthdays, but we don’t. It
+would be self-indulgent.
+My older brother, Caleb, stands in the aisle, holding a railing above his head to keep himself steady. We don’t look alike.

data/lib/tomereader.rb ADDED Viewed

@@ -0,0 +1,13 @@
+require "logging"
+require 'em-synchrony'
+require 'tempfile'
+require "pdf/reader"
+require "tomereader/version"
+require "tomereader/settings"
+require "tomereader/parser"
+require "tomereader/index"
+require "tomereader/phrase"
+require "tomereader/word"
+module Tomereader
+end

data/lib/tomereader/index.rb ADDED Viewed

@@ -0,0 +1,79 @@
+# - розбиває контент книги на фрази та слова
+# - розміщує фрази та слова в індексі
+module Tomereader
+  class Index
+    #include Settings
+    def initialize
+      #raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
+      #raise StandardError, "Content is empty" if content.empty?
+      #@logger = create_logger
+      @phrase_split_pattern = /[\.\;]/
+      @word_pattern = /[A-Za-z]([A-Za-z\'\-])*/
+      @word_storage = Hash.new
+      @total_words = []
+      @phrase_storage = []
+    end
+    def add(content)
+      raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
+      raise StandardError, "Content is empty" if content.empty?
+      phrase_strings = split_into_phrases(content)
+      split(content)
+      self
+    end
+    def split_into_phrases(content)
+      content.split @phrase_split_pattern
+    end
+    def words
+      @word_storage
+    end
+    def phrases
+      @phrase_storage
+    end
+    def words_sorted_by_alphabet(count=nil)
+      words = self.words.sort_by{|key, value| key}
+      words = words.first(count) unless count.nil?
+      Hash[words]
+    end
+    def words_sorted_by_frequency(count=nil)
+      words = self.words.sort_by{|key, value| value.frequency}
+      words = words.first(count) unless count.nil?
+      Hash[words]
+    end
+    def to_s
+      {total: @total_words.count, unique_count: @word_storage.count, phrases: @phrase_storage.count}
+    end
+    # розбиває текст на фрази, витягує слова,
+    # встановлює звязки:  фраза -> слова, та слово -> фрази
+    def split(content)
+      split_into_phrases(content).map do |phrase_string|
+        phrase = Phrase.new(phrase_string)
+        phrase.split do |word_string, position|
+          @total_words << word_string
+          word = find_or_create(word_string)
+          word.add(phrase, position) if word.is_a? Word
+        end
+        @phrase_storage << phrase
+      end
+  end
+  # word word_storage
+  def suitable? word_string
+    word_string =~ @word_pattern
+  end
+  def find(word_string)
+    if @word_storage.has_key?(word_string)
+      @word_storage[word_string]
+    end
+  end
+  def create(word_string)
+    if check word_string
+      @word_storage[word_string] = Word.new(word_string)
+    end
+  end
+  def check(word_string)
+    word_string.kind_of?(String) && suitable?(word_string)
+  end
+  def find_or_create(word_string)
+    find(word_string) || create(word_string)
+  end
+end
+end

data/lib/tomereader/logger.rb ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ module Tomereader
2	+ end

data/lib/tomereader/parser.rb ADDED Viewed

@@ -0,0 +1,36 @@
+module Tomereader
+  class Parser
+    attr_reader :format_pattern, :filename, :reader
+    def initialize(filename)
+      raise ArgumentError, "Specify correct filename" if not filename and filename.empty?
+      raise StandardError, "File #{filename} not exists" unless File.exists? filename
+      @filename = filename
+      @format_pattern = /[a-z0-9_\-\.]+\.([a-z0-9]{3,4})$/
+    end
+    def format
+      @match = format_pattern.match(filename)
+      format = @match[1]
+      raise StandardError, "Format is undefined" unless @match && format
+      format
+    end
+    def read
+      case format
+      when 'pdf'
+        #TODO: check if pdftotext installed
+        open("|pdftotext #{filename} -").read()
+      when 'txt'
+        File.read(filename)
+      else
+        temp_file = Tempfile.new([@match[0], '.txt'])
+        system("ebook-convert #{filename} #{temp_file.path}")
+        content = temp_file.read
+        temp_file.close
+        temp_file.unlink
+        content
+      end
+    end
+    def pages_count
+      reader.page_count
+    end
+  end
+end

data/lib/tomereader/phrase.rb ADDED Viewed

@@ -0,0 +1,32 @@
+module Tomereader
+  class Phrase
+    #include Settings
+    attr_reader :words
+    def initialize(phrase_string)
+      @phrase_string = phrase_string.strip
+      @word_pattern = /[\s,;\"\—]+/
+      @words = []
+      #@logger = create_logger
+    end
+    def to_s
+      @phrase_string
+    end
+    def word_strings
+      @phrase_string.split @word_pattern
+    end
+    # split phrase into words
+    # @return Array of words
+    def split
+      return false if words.count > 0
+      begin
+        word_strings.each_with_index do |word_string, position|
+          word = yield(word_string, position)
+          @words << word if word.is_a? Word
+        end
+        words.count
+      rescue => e
+        #@logger.warn e.message
+      end
+    end
+  end
+end

data/lib/tomereader/settings.rb ADDED Viewed

@@ -0,0 +1,10 @@
+ module Tomereader
+   module Settings
+     def create_logger(name=nil)
+      name ||= 'output'
+      logger = Logging.logger[self]
+      logger.add_appenders(Logging.appenders.file("log/#{name}.log"))
+      return logger
+    end
+  end
+end

data/lib/tomereader/version.rb ADDED Viewed

@@ -0,0 +1,3 @@
+module Tomereader
+  VERSION = "0.0.1"
+end

data/lib/tomereader/word.rb ADDED Viewed

@@ -0,0 +1,33 @@
+module Tomereader
+  class Word
+    attr_reader :name, :phrases
+    def initialize(word)
+      @name = word
+      @phrases = Hash.new
+    end
+    def add(phrase, position)
+      if @phrases.has_key? phrase
+        @phrases[phrase] << position
+      else
+        @phrases[phrase] = [position]
+      end
+      self
+    end
+    def frequency
+      @phrases.count
+    end
+    def phrase_format(phrase)
+      {source: phrase[0].to_s, positions: phrase[1]}
+    end
+    def phrases
+      if block_given?
+        @phrases.each {|phrase| yield phrase_format(phrase)}
+      else
+        @phrases.map{|phrase| phrase_format(phrase)}
+      end
+    end
+    def to_s
+      "#{name} : #{@phrases.count}"
+    end
+  end
+end

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,87 @@
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'tomereader'
+require 'logging'
+require 'rspec/logging_helper'
+# This file was generated by the `rspec --init` command. Conventionally, all
+# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
+# The generated `.rspec` file contains `--require spec_helper` which will cause this
+# file to always be loaded, without a need to explicitly require it in any files.
+#
+# Given that it is always loaded, you are encouraged to keep this file as
+# light-weight as possible. Requiring heavyweight dependencies from this file
+# will add to the boot time of your test suite on EVERY test run, even for an
+# individual file that may not need all of that loaded. Instead, make a
+# separate helper file that requires this one and then use it only in the specs
+# that actually need it.
+#
+# The `.rspec` file also contains a few flags that are not defaults but that
+# users commonly want.
+#
+# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
+RSpec.configure do |config|
+  include RSpec::LoggingHelper
+  config.capture_log_messages
+  #Logging.logger.root.appenders = Logging.appenders.file('log/output.log')
+# The settings below are suggested to provide a good initial experience
+# with RSpec, but feel free to customize to your heart's content.
+=begin
+  # These two settings work together to allow you to limit a spec run
+  # to individual examples or groups you care about by tagging them with
+  # `:focus` metadata. When nothing is tagged with `:focus`, all examples
+  # get run.
+  config.filter_run :focus
+  config.run_all_when_everything_filtered = true
+  # Many RSpec users commonly either run the entire suite or an individual
+  # file, and it's useful to allow more verbose output when running an
+  # individual spec file.
+  if config.files_to_run.one?
+    # Use the documentation formatter for detailed output,
+    # unless a formatter has already been configured
+    # (e.g. via a command-line flag).
+    config.default_formatter = 'doc'
+  end
+  # Print the 10 slowest examples and example groups at the
+  # end of the spec run, to help surface which specs are running
+  # particularly slow.
+  config.profile_examples = 10
+  # Run specs in random order to surface order dependencies. If you find an
+  # order dependency and want to debug it, you can fix the order by providing
+  # the seed, which is printed after each run.
+  #     --seed 1234
+  config.order = :random
+  # Seed global randomization in this process using the `--seed` CLI option.
+  # Setting this allows you to use `--seed` to deterministically reproduce
+  # test failures related to randomization by passing the same `--seed` value
+  # as the one that triggered the failure.
+  Kernel.srand config.seed
+  # rspec-expectations config goes here. You can use an alternate
+  # assertion/expectation library such as wrong or the stdlib/minitest
+  # assertions if you prefer.
+  config.expect_with :rspec do |expectations|
+    # Enable only the newer, non-monkey-patching expect syntax.
+    # For more details, see:
+    #   - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
+    expectations.syntax = :expect
+  end
+  # rspec-mocks config goes here. You can use an alternate test double
+  # library (such as bogus or mocha) by changing the `mock_with` option here.
+  config.mock_with :rspec do |mocks|
+    # Enable only the newer, non-monkey-patching expect syntax.
+    # For more details, see:
+    #   - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
+    mocks.syntax = :expect
+    # Prevents you from mocking or stubbing a method that does not exist on
+    # a real object. This is generally recommended.
+    mocks.verify_partial_doubles = true
+  end
+=end
+end

data/spec/tomereader_spec.rb ADDED Viewed

@@ -0,0 +1,76 @@
+require 'spec_helper'
+describe Tomereader do
+  before do
+    @path = File.expand_path(File.dirname(__FILE__) + "/../data")
+    filename = "three_phrases.txt"
+    @file = File.join(@path, filename)
+  end
+  let(:parser) { Tomereader::Parser.new(@file) }
+  context Tomereader::Parser do
+    it "has correct filename path" do
+      expect(File.exists? @file).to eq true
+    end
+    it "has content" do
+      expect(parser.read.length).to be > 0
+    end
+    it "converts pdf to txt and reads" do
+      filename = "evented-spec.pdf"
+      file = File.join(@path, filename)
+      expect(File.exists?(file)).to eq true
+      parser = Tomereader::Parser.new(file)
+      expect(parser.read.length).to be > 0
+    end
+    it "creates temp file" do
+      temp_file = Tempfile.new(["test", '.txt'])
+      expect(File.exists?(temp_file)).to eq true
+      temp_file.close
+      temp_file.unlink
+    end
+    it "converts fb2 to txt and reads" do
+      filename = "stormrage.fb2"
+      file = File.join(@path, filename)
+      expect(File.exists?(file)).to eq true
+      parser = Tomereader::Parser.new(file)
+      expect(parser.read.length).to be > 0
+    end
+  end
+  context Tomereader::Index do
+    let(:content) { parser.read }
+    let(:word) { "tomereader" }
+    let(:index) { Tomereader::Index.new}
+    let(:book_info) { {:total=>64, :phrases=>5} }
+    before(:each){index.add(content)}
+    it "creates word in word storage" do
+      expect(index.create(word)).to be_a Tomereader::Word
+    end
+    it "finds word in word storage" do
+      index.create(word)
+      expect(index.find(word)).to be_a Tomereader::Word
+    end
+    it "empty word is not suitable for storage" do
+      expect(index.suitable? "").to_not eq true
+    end
+    it "split_into_phrases" do
+      expect(index.split_into_phrases(content).count).to eq book_info[:phrases]
+    end
+    it "#split" do
+      expect(index.to_s[:total]).to eq book_info[:total]
+    end
+    it "shows word's list of phrases" do
+      word = index.find('alike')
+      phrases = word.phrases
+      expect(phrases.count).to eq 1
+      expect(phrases[0][:source]).to eq "We don’t look alike"
+      expect(phrases[0][:positions]).to be_a_kind_of Array
+      expect(phrases[0][:positions][0]).to eq 3
+    end
+    it "shows word's list sorted by frequency" do
+      hash = index.words_sorted_by_frequency
+      expect(hash.first[0]).to eq "I"
+    end
+  end
+end

data/tomereader.gemspec ADDED Viewed

@@ -0,0 +1,28 @@
+# coding: utf-8
+lib = File.expand_path('../lib', __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require 'tomereader/version'
+Gem::Specification.new do |spec|
+  spec.name          = "tomereader"
+  spec.version       = Tomereader::VERSION
+  spec.authors       = ["nychka"]
+  spec.email         = ["nychka93@gmail.com"]
+  spec.summary       = %q{Tomereader will help you to read English books}
+  spec.description   = %q{Tomereader will help you to learn English by reading your favourites books}
+  spec.homepage      = ""
+  spec.license       = "MIT"
+  spec.files         = `git ls-files -z`.split("\x0")
+  spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
+  spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
+  spec.require_paths = ["lib"]
+  spec.add_development_dependency "bundler", "~> 1.5"
+  spec.add_development_dependency "rake"
+  spec.add_development_dependency "pdf-reader", "~>1.3"
+  spec.add_development_dependency "rspec", "~>3.0"
+  spec.add_development_dependency "logging"
+  spec.add_development_dependency "em-synchrony"
+  spec.add_development_dependency "tempfile"
+end

metadata ADDED Viewed

@@ -0,0 +1,186 @@
+--- !ruby/object:Gem::Specification
+name: tomereader
+version: !ruby/object:Gem::Version
+  version: 0.0.1
+  prerelease:
+platform: ruby
+authors:
+- nychka
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2014-09-07 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: bundler
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.5'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.5'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: pdf-reader
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.3'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '1.3'
+- !ruby/object:Gem::Dependency
+  name: rspec
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '3.0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - ~>
+      - !ruby/object:Gem::Version
+        version: '3.0'
+- !ruby/object:Gem::Dependency
+  name: logging
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: em-synchrony
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+- !ruby/object:Gem::Dependency
+  name: tempfile
+  requirement: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    none: false
+    requirements:
+    - - '>='
+      - !ruby/object:Gem::Version
+        version: '0'
+description: Tomereader will help you to learn English by reading your favourites
+  books
+email:
+- nychka93@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- .autotest
+- .gitignore
+- .rspec
+- Gemfile
+- LICENSE.txt
+- README.md
+- Rakefile
+- data/content.txt
+- data/divergent.txt
+- data/divergent_1.pdf
+- data/divergent_1.txt
+- data/evented-spec.pdf
+- data/stormrage.fb2
+- data/three_phrases.txt
+- lib/tomereader.rb
+- lib/tomereader/index.rb
+- lib/tomereader/logger.rb
+- lib/tomereader/parser.rb
+- lib/tomereader/phrase.rb
+- lib/tomereader/settings.rb
+- lib/tomereader/version.rb
+- lib/tomereader/word.rb
+- spec/spec_helper.rb
+- spec/tomereader_spec.rb
+- tomereader.gemspec
+homepage: ''
+licenses:
+- MIT
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.29
+signing_key:
+specification_version: 3
+summary: Tomereader will help you to read English books
+test_files:
+- spec/spec_helper.rb
+- spec/tomereader_spec.rb