tomereader 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ I see a narrow face, wide, round eyes, and a long, thin nose—I still look
2
+ like a little girl, though sometime in the last few months I turned sixteen. The other factions celebrate birthdays, but we don’t. It
3
+ would be self-indulgent.
4
+ My older brother, Caleb, stands in the aisle, holding a railing above his head to keep himself steady. We don’t look alike.
data/lib/tomereader.rb ADDED
@@ -0,0 +1,13 @@
1
+ require "logging"
2
+ require 'em-synchrony'
3
+ require 'tempfile'
4
+ require "pdf/reader"
5
+ require "tomereader/version"
6
+ require "tomereader/settings"
7
+ require "tomereader/parser"
8
+ require "tomereader/index"
9
+ require "tomereader/phrase"
10
+ require "tomereader/word"
11
+
12
+ module Tomereader
13
+ end
@@ -0,0 +1,79 @@
1
+ # - розбиває контент книги на фрази та слова
2
+ # - розміщує фрази та слова в індексі
3
+ module Tomereader
4
+ class Index
5
+ #include Settings
6
+ def initialize
7
+ #raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
8
+ #raise StandardError, "Content is empty" if content.empty?
9
+ #@logger = create_logger
10
+ @phrase_split_pattern = /[\.\;]/
11
+ @word_pattern = /[A-Za-z]([A-Za-z\'\-])*/
12
+ @word_storage = Hash.new
13
+ @total_words = []
14
+ @phrase_storage = []
15
+ end
16
+ def add(content)
17
+ raise ArgumentError, "Content must be a String - #{content.class} given instead" unless content.kind_of? String
18
+ raise StandardError, "Content is empty" if content.empty?
19
+ phrase_strings = split_into_phrases(content)
20
+ split(content)
21
+ self
22
+ end
23
+ def split_into_phrases(content)
24
+ content.split @phrase_split_pattern
25
+ end
26
+ def words
27
+ @word_storage
28
+ end
29
+ def phrases
30
+ @phrase_storage
31
+ end
32
+ def words_sorted_by_alphabet(count=nil)
33
+ words = self.words.sort_by{|key, value| key}
34
+ words = words.first(count) unless count.nil?
35
+ Hash[words]
36
+ end
37
+ def words_sorted_by_frequency(count=nil)
38
+ words = self.words.sort_by{|key, value| value.frequency}
39
+ words = words.first(count) unless count.nil?
40
+ Hash[words]
41
+ end
42
+ def to_s
43
+ {total: @total_words.count, unique_count: @word_storage.count, phrases: @phrase_storage.count}
44
+ end
45
+ # розбиває текст на фрази, витягує слова,
46
+ # встановлює звязки: фраза -> слова, та слово -> фрази
47
+ def split(content)
48
+ split_into_phrases(content).map do |phrase_string|
49
+ phrase = Phrase.new(phrase_string)
50
+ phrase.split do |word_string, position|
51
+ @total_words << word_string
52
+ word = find_or_create(word_string)
53
+ word.add(phrase, position) if word.is_a? Word
54
+ end
55
+ @phrase_storage << phrase
56
+ end
57
+ end
58
+ # word word_storage
59
+ def suitable? word_string
60
+ word_string =~ @word_pattern
61
+ end
62
+ def find(word_string)
63
+ if @word_storage.has_key?(word_string)
64
+ @word_storage[word_string]
65
+ end
66
+ end
67
+ def create(word_string)
68
+ if check word_string
69
+ @word_storage[word_string] = Word.new(word_string)
70
+ end
71
+ end
72
+ def check(word_string)
73
+ word_string.kind_of?(String) && suitable?(word_string)
74
+ end
75
+ def find_or_create(word_string)
76
+ find(word_string) || create(word_string)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,2 @@
1
+ module Tomereader
2
+ end
@@ -0,0 +1,36 @@
1
+ module Tomereader
2
+ class Parser
3
+ attr_reader :format_pattern, :filename, :reader
4
+ def initialize(filename)
5
+ raise ArgumentError, "Specify correct filename" if not filename and filename.empty?
6
+ raise StandardError, "File #{filename} not exists" unless File.exists? filename
7
+ @filename = filename
8
+ @format_pattern = /[a-z0-9_\-\.]+\.([a-z0-9]{3,4})$/
9
+ end
10
+ def format
11
+ @match = format_pattern.match(filename)
12
+ format = @match[1]
13
+ raise StandardError, "Format is undefined" unless @match && format
14
+ format
15
+ end
16
+ def read
17
+ case format
18
+ when 'pdf'
19
+ #TODO: check if pdftotext installed
20
+ open("|pdftotext #{filename} -").read()
21
+ when 'txt'
22
+ File.read(filename)
23
+ else
24
+ temp_file = Tempfile.new([@match[0], '.txt'])
25
+ system("ebook-convert #{filename} #{temp_file.path}")
26
+ content = temp_file.read
27
+ temp_file.close
28
+ temp_file.unlink
29
+ content
30
+ end
31
+ end
32
+ def pages_count
33
+ reader.page_count
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,32 @@
1
+ module Tomereader
2
+ class Phrase
3
+ #include Settings
4
+ attr_reader :words
5
+ def initialize(phrase_string)
6
+ @phrase_string = phrase_string.strip
7
+ @word_pattern = /[\s,;\"\—]+/
8
+ @words = []
9
+ #@logger = create_logger
10
+ end
11
+ def to_s
12
+ @phrase_string
13
+ end
14
+ def word_strings
15
+ @phrase_string.split @word_pattern
16
+ end
17
+ # split phrase into words
18
+ # @return Array of words
19
+ def split
20
+ return false if words.count > 0
21
+ begin
22
+ word_strings.each_with_index do |word_string, position|
23
+ word = yield(word_string, position)
24
+ @words << word if word.is_a? Word
25
+ end
26
+ words.count
27
+ rescue => e
28
+ #@logger.warn e.message
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,10 @@
1
+ module Tomereader
2
+ module Settings
3
+ def create_logger(name=nil)
4
+ name ||= 'output'
5
+ logger = Logging.logger[self]
6
+ logger.add_appenders(Logging.appenders.file("log/#{name}.log"))
7
+ return logger
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,3 @@
1
+ module Tomereader
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,33 @@
1
+ module Tomereader
2
+ class Word
3
+ attr_reader :name, :phrases
4
+ def initialize(word)
5
+ @name = word
6
+ @phrases = Hash.new
7
+ end
8
+ def add(phrase, position)
9
+ if @phrases.has_key? phrase
10
+ @phrases[phrase] << position
11
+ else
12
+ @phrases[phrase] = [position]
13
+ end
14
+ self
15
+ end
16
+ def frequency
17
+ @phrases.count
18
+ end
19
+ def phrase_format(phrase)
20
+ {source: phrase[0].to_s, positions: phrase[1]}
21
+ end
22
+ def phrases
23
+ if block_given?
24
+ @phrases.each {|phrase| yield phrase_format(phrase)}
25
+ else
26
+ @phrases.map{|phrase| phrase_format(phrase)}
27
+ end
28
+ end
29
+ def to_s
30
+ "#{name} : #{@phrases.count}"
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,87 @@
1
+ lib = File.expand_path('../lib', __FILE__)
2
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
3
+ require 'tomereader'
4
+ require 'logging'
5
+ require 'rspec/logging_helper'
6
+ # This file was generated by the `rspec --init` command. Conventionally, all
7
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
8
+ # The generated `.rspec` file contains `--require spec_helper` which will cause this
9
+ # file to always be loaded, without a need to explicitly require it in any files.
10
+ #
11
+ # Given that it is always loaded, you are encouraged to keep this file as
12
+ # light-weight as possible. Requiring heavyweight dependencies from this file
13
+ # will add to the boot time of your test suite on EVERY test run, even for an
14
+ # individual file that may not need all of that loaded. Instead, make a
15
+ # separate helper file that requires this one and then use it only in the specs
16
+ # that actually need it.
17
+ #
18
+ # The `.rspec` file also contains a few flags that are not defaults but that
19
+ # users commonly want.
20
+ #
21
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
22
+ RSpec.configure do |config|
23
+ include RSpec::LoggingHelper
24
+ config.capture_log_messages
25
+
26
+ #Logging.logger.root.appenders = Logging.appenders.file('log/output.log')
27
+ # The settings below are suggested to provide a good initial experience
28
+ # with RSpec, but feel free to customize to your heart's content.
29
+ =begin
30
+ # These two settings work together to allow you to limit a spec run
31
+ # to individual examples or groups you care about by tagging them with
32
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
33
+ # get run.
34
+ config.filter_run :focus
35
+ config.run_all_when_everything_filtered = true
36
+
37
+ # Many RSpec users commonly either run the entire suite or an individual
38
+ # file, and it's useful to allow more verbose output when running an
39
+ # individual spec file.
40
+ if config.files_to_run.one?
41
+ # Use the documentation formatter for detailed output,
42
+ # unless a formatter has already been configured
43
+ # (e.g. via a command-line flag).
44
+ config.default_formatter = 'doc'
45
+ end
46
+
47
+ # Print the 10 slowest examples and example groups at the
48
+ # end of the spec run, to help surface which specs are running
49
+ # particularly slow.
50
+ config.profile_examples = 10
51
+
52
+ # Run specs in random order to surface order dependencies. If you find an
53
+ # order dependency and want to debug it, you can fix the order by providing
54
+ # the seed, which is printed after each run.
55
+ # --seed 1234
56
+ config.order = :random
57
+
58
+ # Seed global randomization in this process using the `--seed` CLI option.
59
+ # Setting this allows you to use `--seed` to deterministically reproduce
60
+ # test failures related to randomization by passing the same `--seed` value
61
+ # as the one that triggered the failure.
62
+ Kernel.srand config.seed
63
+
64
+ # rspec-expectations config goes here. You can use an alternate
65
+ # assertion/expectation library such as wrong or the stdlib/minitest
66
+ # assertions if you prefer.
67
+ config.expect_with :rspec do |expectations|
68
+ # Enable only the newer, non-monkey-patching expect syntax.
69
+ # For more details, see:
70
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
71
+ expectations.syntax = :expect
72
+ end
73
+
74
+ # rspec-mocks config goes here. You can use an alternate test double
75
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
76
+ config.mock_with :rspec do |mocks|
77
+ # Enable only the newer, non-monkey-patching expect syntax.
78
+ # For more details, see:
79
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
80
+ mocks.syntax = :expect
81
+
82
+ # Prevents you from mocking or stubbing a method that does not exist on
83
+ # a real object. This is generally recommended.
84
+ mocks.verify_partial_doubles = true
85
+ end
86
+ =end
87
+ end
@@ -0,0 +1,76 @@
1
+ require 'spec_helper'
2
+
3
+ describe Tomereader do
4
+ before do
5
+ @path = File.expand_path(File.dirname(__FILE__) + "/../data")
6
+ filename = "three_phrases.txt"
7
+ @file = File.join(@path, filename)
8
+ end
9
+
10
+ let(:parser) { Tomereader::Parser.new(@file) }
11
+ context Tomereader::Parser do
12
+ it "has correct filename path" do
13
+ expect(File.exists? @file).to eq true
14
+ end
15
+ it "has content" do
16
+ expect(parser.read.length).to be > 0
17
+ end
18
+ it "converts pdf to txt and reads" do
19
+ filename = "evented-spec.pdf"
20
+ file = File.join(@path, filename)
21
+ expect(File.exists?(file)).to eq true
22
+ parser = Tomereader::Parser.new(file)
23
+ expect(parser.read.length).to be > 0
24
+ end
25
+ it "creates temp file" do
26
+ temp_file = Tempfile.new(["test", '.txt'])
27
+ expect(File.exists?(temp_file)).to eq true
28
+ temp_file.close
29
+ temp_file.unlink
30
+ end
31
+ it "converts fb2 to txt and reads" do
32
+ filename = "stormrage.fb2"
33
+ file = File.join(@path, filename)
34
+ expect(File.exists?(file)).to eq true
35
+ parser = Tomereader::Parser.new(file)
36
+ expect(parser.read.length).to be > 0
37
+ end
38
+ end
39
+ context Tomereader::Index do
40
+ let(:content) { parser.read }
41
+ let(:word) { "tomereader" }
42
+ let(:index) { Tomereader::Index.new}
43
+ let(:book_info) { {:total=>64, :phrases=>5} }
44
+
45
+ before(:each){index.add(content)}
46
+
47
+ it "creates word in word storage" do
48
+ expect(index.create(word)).to be_a Tomereader::Word
49
+ end
50
+ it "finds word in word storage" do
51
+ index.create(word)
52
+ expect(index.find(word)).to be_a Tomereader::Word
53
+ end
54
+ it "empty word is not suitable for storage" do
55
+ expect(index.suitable? "").to_not eq true
56
+ end
57
+ it "split_into_phrases" do
58
+ expect(index.split_into_phrases(content).count).to eq book_info[:phrases]
59
+ end
60
+ it "#split" do
61
+ expect(index.to_s[:total]).to eq book_info[:total]
62
+ end
63
+ it "shows word's list of phrases" do
64
+ word = index.find('alike')
65
+ phrases = word.phrases
66
+ expect(phrases.count).to eq 1
67
+ expect(phrases[0][:source]).to eq "We don’t look alike"
68
+ expect(phrases[0][:positions]).to be_a_kind_of Array
69
+ expect(phrases[0][:positions][0]).to eq 3
70
+ end
71
+ it "shows word's list sorted by frequency" do
72
+ hash = index.words_sorted_by_frequency
73
+ expect(hash.first[0]).to eq "I"
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,28 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'tomereader/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "tomereader"
8
+ spec.version = Tomereader::VERSION
9
+ spec.authors = ["nychka"]
10
+ spec.email = ["nychka93@gmail.com"]
11
+ spec.summary = %q{Tomereader will help you to read English books}
12
+ spec.description = %q{Tomereader will help you to learn English by reading your favourites books}
13
+ spec.homepage = ""
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files -z`.split("\x0")
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.5"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_development_dependency "pdf-reader", "~>1.3"
24
+ spec.add_development_dependency "rspec", "~>3.0"
25
+ spec.add_development_dependency "logging"
26
+ spec.add_development_dependency "em-synchrony"
27
+ spec.add_development_dependency "tempfile"
28
+ end
metadata ADDED
@@ -0,0 +1,186 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tomereader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - nychka
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-09-07 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bundler
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: '1.5'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: '1.5'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rake
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: pdf-reader
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '1.3'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '1.3'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rspec
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '3.0'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '3.0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: logging
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: em-synchrony
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: tempfile
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ description: Tomereader will help you to learn English by reading your favourites
127
+ books
128
+ email:
129
+ - nychka93@gmail.com
130
+ executables: []
131
+ extensions: []
132
+ extra_rdoc_files: []
133
+ files:
134
+ - .autotest
135
+ - .gitignore
136
+ - .rspec
137
+ - Gemfile
138
+ - LICENSE.txt
139
+ - README.md
140
+ - Rakefile
141
+ - data/content.txt
142
+ - data/divergent.txt
143
+ - data/divergent_1.pdf
144
+ - data/divergent_1.txt
145
+ - data/evented-spec.pdf
146
+ - data/stormrage.fb2
147
+ - data/three_phrases.txt
148
+ - lib/tomereader.rb
149
+ - lib/tomereader/index.rb
150
+ - lib/tomereader/logger.rb
151
+ - lib/tomereader/parser.rb
152
+ - lib/tomereader/phrase.rb
153
+ - lib/tomereader/settings.rb
154
+ - lib/tomereader/version.rb
155
+ - lib/tomereader/word.rb
156
+ - spec/spec_helper.rb
157
+ - spec/tomereader_spec.rb
158
+ - tomereader.gemspec
159
+ homepage: ''
160
+ licenses:
161
+ - MIT
162
+ post_install_message:
163
+ rdoc_options: []
164
+ require_paths:
165
+ - lib
166
+ required_ruby_version: !ruby/object:Gem::Requirement
167
+ none: false
168
+ requirements:
169
+ - - '>='
170
+ - !ruby/object:Gem::Version
171
+ version: '0'
172
+ required_rubygems_version: !ruby/object:Gem::Requirement
173
+ none: false
174
+ requirements:
175
+ - - '>='
176
+ - !ruby/object:Gem::Version
177
+ version: '0'
178
+ requirements: []
179
+ rubyforge_project:
180
+ rubygems_version: 1.8.29
181
+ signing_key:
182
+ specification_version: 3
183
+ summary: Tomereader will help you to read English books
184
+ test_files:
185
+ - spec/spec_helper.rb
186
+ - spec/tomereader_spec.rb