tomereader 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ I see a narrow face, wide, round eyes, and a long, thin nose—I still look
2
+ like a little girl, though sometime in the last few months I turned sixteen. The other factions celebrate birthdays, but we don’t. It
3
+ would be self-indulgent.
4
+ My older brother, Caleb, stands in the aisle, holding a railing above his head to keep himself steady. We don’t look alike.
data/lib/tomereader.rb ADDED
@@ -0,0 +1,13 @@
1
+ require "logging"
2
+ require 'em-synchrony'
3
+ require 'tempfile'
4
+ require "pdf/reader"
5
+ require "tomereader/version"
6
+ require "tomereader/settings"
7
+ require "tomereader/parser"
8
+ require "tomereader/index"
9
+ require "tomereader/phrase"
10
+ require "tomereader/word"
11
+
12
+ module Tomereader
13
+ end
@@ -0,0 +1,79 @@
1
+ # - розбиває контент книги на фрази та слова
2
+ # - розміщує фрази та слова в індексі
3
+ module Tomereader
4
+ class Index
5
+ #include Settings
6
+ def initialize
7
+ #raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
8
+ #raise StandardError, "Content is empty" if content.empty?
9
+ #@logger = create_logger
10
+ @phrase_split_pattern = /[\.\;]/
11
+ @word_pattern = /[A-Za-z]([A-Za-z\'\-])*/
12
+ @word_storage = Hash.new
13
+ @total_words = []
14
+ @phrase_storage = []
15
+ end
16
+ def add(content)
17
+ raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
18
+ raise StandardError, "Content is empty" if content.empty?
19
+ phrase_strings = split_into_phrases(content)
20
+ split(content)
21
+ self
22
+ end
23
+ def split_into_phrases(content)
24
+ content.split @phrase_split_pattern
25
+ end
26
+ def words
27
+ @word_storage
28
+ end
29
+ def phrases
30
+ @phrase_storage
31
+ end
32
+ def words_sorted_by_alphabet(count=nil)
33
+ words = self.words.sort_by{|key, value| key}
34
+ words = words.first(count) unless count.nil?
35
+ Hash[words]
36
+ end
37
+ def words_sorted_by_frequency(count=nil)
38
+ words = self.words.sort_by{|key, value| value.frequency}
39
+ words = words.first(count) unless count.nil?
40
+ Hash[words]
41
+ end
42
+ def to_s
43
+ {total: @total_words.count, unique_count: @word_storage.count, phrases: @phrase_storage.count}
44
+ end
45
+ # розбиває текст на фрази, витягує слова,
46
+ # встановлює звязки: фраза -> слова, та слово -> фрази
47
+ def split(content)
48
+ split_into_phrases(content).map do |phrase_string|
49
+ phrase = Phrase.new(phrase_string)
50
+ phrase.split do |word_string, position|
51
+ @total_words << word_string
52
+ word = find_or_create(word_string)
53
+ word.add(phrase, position) if word.is_a? Word
54
+ end
55
+ @phrase_storage << phrase
56
+ end
57
+ end
58
+ # word word_storage
59
+ def suitable? word_string
60
+ word_string =~ @word_pattern
61
+ end
62
+ def find(word_string)
63
+ if @word_storage.has_key?(word_string)
64
+ @word_storage[word_string]
65
+ end
66
+ end
67
+ def create(word_string)
68
+ if check word_string
69
+ @word_storage[word_string] = Word.new(word_string)
70
+ end
71
+ end
72
+ def check(word_string)
73
+ word_string.kind_of?(String) && suitable?(word_string)
74
+ end
75
+ def find_or_create(word_string)
76
+ find(word_string) || create(word_string)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,2 @@
1
+ module Tomereader
2
+ end
@@ -0,0 +1,36 @@
1
+ module Tomereader
2
+ class Parser
3
+ attr_reader :format_pattern, :filename, :reader
4
+ def initialize(filename)
5
+ raise ArgumentError, "Specify correct filename" if not filename and filename.empty?
6
+ raise StandardError, "File #{filename} not exists" unless File.exists? filename
7
+ @filename = filename
8
+ @format_pattern = /[a-z0-9_\-\.]+\.([a-z0-9]{3,4})$/
9
+ end
10
+ def format
11
+ @match = format_pattern.match(filename)
12
+ format = @match[1]
13
+ raise StandardError, "Format is undefined" unless @match && format
14
+ format
15
+ end
16
+ def read
17
+ case format
18
+ when 'pdf'
19
+ #TODO: check if pdftotext installed
20
+ open("|pdftotext #{filename} -").read()
21
+ when 'txt'
22
+ File.read(filename)
23
+ else
24
+ temp_file = Tempfile.new([@match[0], '.txt'])
25
+ system("ebook-convert #{filename} #{temp_file.path}")
26
+ content = temp_file.read
27
+ temp_file.close
28
+ temp_file.unlink
29
+ content
30
+ end
31
+ end
32
+ def pages_count
33
+ reader.page_count
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,32 @@
1
+ module Tomereader
2
+ class Phrase
3
+ #include Settings
4
+ attr_reader :words
5
+ def initialize(phrase_string)
6
+ @phrase_string = phrase_string.strip
7
+ @word_pattern = /[\s,;\"\—]+/
8
+ @words = []
9
+ #@logger = create_logger
10
+ end
11
+ def to_s
12
+ @phrase_string
13
+ end
14
+ def word_strings
15
+ @phrase_string.split @word_pattern
16
+ end
17
+ # split phrase into words
18
+ # @return Array of words
19
+ def split
20
+ return false if words.count > 0
21
+ begin
22
+ word_strings.each_with_index do |word_string, position|
23
+ word = yield(word_string, position)
24
+ @words << word if word.is_a? Word
25
+ end
26
+ words.count
27
+ rescue => e
28
+ #@logger.warn e.message
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,10 @@
1
+ module Tomereader
2
+ module Settings
3
+ def create_logger(name=nil)
4
+ name ||= 'output'
5
+ logger = Logging.logger[self]
6
+ logger.add_appenders(Logging.appenders.file("log/#{name}.log"))
7
+ return logger
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ module Tomereader
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,33 @@
1
+ module Tomereader
2
+ class Word
3
+ attr_reader :name, :phrases
4
+ def initialize(word)
5
+ @name = word
6
+ @phrases = Hash.new
7
+ end
8
+ def add(phrase, position)
9
+ if @phrases.has_key? phrase
10
+ @phrases[phrase] << position
11
+ else
12
+ @phrases[phrase] = [position]
13
+ end
14
+ self
15
+ end
16
+ def frequency
17
+ @phrases.count
18
+ end
19
+ def phrase_format(phrase)
20
+ {source: phrase[0].to_s, positions: phrase[1]}
21
+ end
22
+ def phrases
23
+ if block_given?
24
+ @phrases.each {|phrase| yield phrase_format(phrase)}
25
+ else
26
+ @phrases.map{|phrase| phrase_format(phrase)}
27
+ end
28
+ end
29
+ def to_s
30
+ "#{name} : #{@phrases.count}"
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,87 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'tomereader'
4
+ require 'logging'
5
+ require 'rspec/logging_helper'
6
+ # This file was generated by the `rspec --init` command. Conventionally, all
7
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
8
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
9
+ # file to always be loaded, without a need to explicitly require it in any files.
10
+ #
11
+ # Given that it is always loaded, you are encouraged to keep this file as
12
+ # light-weight as possible. Requiring heavyweight dependencies from this file
13
+ # will add to the boot time of your test suite on EVERY test run, even for an
14
+ # individual file that may not need all of that loaded. Instead, make a
15
+ # separate helper file that requires this one and then use it only in the specs
16
+ # that actually need it.
17
+ #
18
+ # The `.rspec` file also contains a few flags that are not defaults but that
19
+ # users commonly want.
20
+ #
21
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
22
+ RSpec.configure do |config|
23
+ include RSpec::LoggingHelper
24
+ config.capture_log_messages
25
+
26
+ #Logging.logger.root.appenders = Logging.appenders.file('log/output.log')
27
+ # The settings below are suggested to provide a good initial experience
28
+ # with RSpec, but feel free to customize to your heart's content.
29
+ =begin
30
+ # These two settings work together to allow you to limit a spec run
31
+ # to individual examples or groups you care about by tagging them with
32
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
33
+ # get run.
34
+ config.filter_run :focus
35
+ config.run_all_when_everything_filtered = true
36
+
37
+ # Many RSpec users commonly either run the entire suite or an individual
38
+ # file, and it's useful to allow more verbose output when running an
39
+ # individual spec file.
40
+ if config.files_to_run.one?
41
+ # Use the documentation formatter for detailed output,
42
+ # unless a formatter has already been configured
43
+ # (e.g. via a command-line flag).
44
+ config.default_formatter = 'doc'
45
+ end
46
+
47
+ # Print the 10 slowest examples and example groups at the
48
+ # end of the spec run, to help surface which specs are running
49
+ # particularly slow.
50
+ config.profile_examples = 10
51
+
52
+ # Run specs in random order to surface order dependencies. If you find an
53
+ # order dependency and want to debug it, you can fix the order by providing
54
+ # the seed, which is printed after each run.
55
+ # --seed 1234
56
+ config.order = :random
57
+
58
+ # Seed global randomization in this process using the `--seed` CLI option.
59
+ # Setting this allows you to use `--seed` to deterministically reproduce
60
+ # test failures related to randomization by passing the same `--seed` value
61
+ # as the one that triggered the failure.
62
+ Kernel.srand config.seed
63
+
64
+ # rspec-expectations config goes here. You can use an alternate
65
+ # assertion/expectation library such as wrong or the stdlib/minitest
66
+ # assertions if you prefer.
67
+ config.expect_with :rspec do |expectations|
68
+ # Enable only the newer, non-monkey-patching expect syntax.
69
+ # For more details, see:
70
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
71
+ expectations.syntax = :expect
72
+ end
73
+
74
+ # rspec-mocks config goes here. You can use an alternate test double
75
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
76
+ config.mock_with :rspec do |mocks|
77
+ # Enable only the newer, non-monkey-patching expect syntax.
78
+ # For more details, see:
79
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
80
+ mocks.syntax = :expect
81
+
82
+ # Prevents you from mocking or stubbing a method that does not exist on
83
+ # a real object. This is generally recommended.
84
+ mocks.verify_partial_doubles = true
85
+ end
86
+ =end
87
+ end
@@ -0,0 +1,76 @@
1
+ require 'spec_helper'
2
+
3
+ describe Tomereader do
4
+ before do
5
+ @path = File.expand_path(File.dirname(__FILE__) + "/../data")
6
+ filename = "three_phrases.txt"
7
+ @file = File.join(@path, filename)
8
+ end
9
+
10
+ let(:parser) { Tomereader::Parser.new(@file) }
11
+ context Tomereader::Parser do
12
+ it "has correct filename path" do
13
+ expect(File.exists? @file).to eq true
14
+ end
15
+ it "has content" do
16
+ expect(parser.read.length).to be > 0
17
+ end
18
+ it "converts pdf to txt and reads" do
19
+ filename = "evented-spec.pdf"
20
+ file = File.join(@path, filename)
21
+ expect(File.exists?(file)).to eq true
22
+ parser = Tomereader::Parser.new(file)
23
+ expect(parser.read.length).to be > 0
24
+ end
25
+ it "creates temp file" do
26
+ temp_file = Tempfile.new(["test", '.txt'])
27
+ expect(File.exists?(temp_file)).to eq true
28
+ temp_file.close
29
+ temp_file.unlink
30
+ end
31
+ it "converts fb2 to txt and reads" do
32
+ filename = "stormrage.fb2"
33
+ file = File.join(@path, filename)
34
+ expect(File.exists?(file)).to eq true
35
+ parser = Tomereader::Parser.new(file)
36
+ expect(parser.read.length).to be > 0
37
+ end
38
+ end
39
+ context Tomereader::Index do
40
+ let(:content) { parser.read }
41
+ let(:word) { "tomereader" }
42
+ let(:index) { Tomereader::Index.new}
43
+ let(:book_info) { {:total=>64, :phrases=>5} }
44
+
45
+ before(:each){index.add(content)}
46
+
47
+ it "creates word in word storage" do
48
+ expect(index.create(word)).to be_a Tomereader::Word
49
+ end
50
+ it "finds word in word storage" do
51
+ index.create(word)
52
+ expect(index.find(word)).to be_a Tomereader::Word
53
+ end
54
+ it "empty word is not suitable for storage" do
55
+ expect(index.suitable? "").to_not eq true
56
+ end
57
+ it "split_into_phrases" do
58
+ expect(index.split_into_phrases(content).count).to eq book_info[:phrases]
59
+ end
60
+ it "#split" do
61
+ expect(index.to_s[:total]).to eq book_info[:total]
62
+ end
63
+ it "shows word's list of phrases" do
64
+ word = index.find('alike')
65
+ phrases = word.phrases
66
+ expect(phrases.count).to eq 1
67
+ expect(phrases[0][:source]).to eq "We don’t look alike"
68
+ expect(phrases[0][:positions]).to be_a_kind_of Array
69
+ expect(phrases[0][:positions][0]).to eq 3
70
+ end
71
+ it "shows word's list sorted by frequency" do
72
+ hash = index.words_sorted_by_frequency
73
+ expect(hash.first[0]).to eq "I"
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'tomereader/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tomereader"
8
+ spec.version = Tomereader::VERSION
9
+ spec.authors = ["nychka"]
10
+ spec.email = ["nychka93@gmail.com"]
11
+ spec.summary = %q{Tomereader will help you to read English books}
12
+ spec.description = %q{Tomereader will help you to learn English by reading your favourites books}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "pdf-reader", "~>1.3"
24
+ spec.add_development_dependency "rspec", "~>3.0"
25
+ spec.add_development_dependency "logging"
26
+ spec.add_development_dependency "em-synchrony"
27
+ spec.add_development_dependency "tempfile"
28
+ end
metadata ADDED
@@ -0,0 +1,186 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tomereader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - nychka
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-09-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.5'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: pdf-reader
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '1.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '3.0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '3.0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: logging
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: em-synchrony
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: tempfile
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ description: Tomereader will help you to learn English by reading your favourites
127
+ books
128
+ email:
129
+ - nychka93@gmail.com
130
+ executables: []
131
+ extensions: []
132
+ extra_rdoc_files: []
133
+ files:
134
+ - .autotest
135
+ - .gitignore
136
+ - .rspec
137
+ - Gemfile
138
+ - LICENSE.txt
139
+ - README.md
140
+ - Rakefile
141
+ - data/content.txt
142
+ - data/divergent.txt
143
+ - data/divergent_1.pdf
144
+ - data/divergent_1.txt
145
+ - data/evented-spec.pdf
146
+ - data/stormrage.fb2
147
+ - data/three_phrases.txt
148
+ - lib/tomereader.rb
149
+ - lib/tomereader/index.rb
150
+ - lib/tomereader/logger.rb
151
+ - lib/tomereader/parser.rb
152
+ - lib/tomereader/phrase.rb
153
+ - lib/tomereader/settings.rb
154
+ - lib/tomereader/version.rb
155
+ - lib/tomereader/word.rb
156
+ - spec/spec_helper.rb
157
+ - spec/tomereader_spec.rb
158
+ - tomereader.gemspec
159
+ homepage: ''
160
+ licenses:
161
+ - MIT
162
+ post_install_message:
163
+ rdoc_options: []
164
+ require_paths:
165
+ - lib
166
+ required_ruby_version: !ruby/object:Gem::Requirement
167
+ none: false
168
+ requirements:
169
+ - - '>='
170
+ - !ruby/object:Gem::Version
171
+ version: '0'
172
+ required_rubygems_version: !ruby/object:Gem::Requirement
173
+ none: false
174
+ requirements:
175
+ - - '>='
176
+ - !ruby/object:Gem::Version
177
+ version: '0'
178
+ requirements: []
179
+ rubyforge_project:
180
+ rubygems_version: 1.8.29
181
+ signing_key:
182
+ specification_version: 3
183
+ summary: Tomereader will help you to read English books
184
+ test_files:
185
+ - spec/spec_helper.rb
186
+ - spec/tomereader_spec.rb