gutenug 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0f593323290576de24cc4acec51bc6bb40ad7b1a31af2f704e0c97d2db9c5b65
4
+ data.tar.gz: 75c17d4a9ed9d2805e9e5a3a18e1a846401f53dbbda38945102863a1699117aa
5
+ SHA512:
6
+ metadata.gz: 603ae07db6703c04b478ffb29018af65f00f1a9a09ae873ad896781d9640fc492b7a022318e4adca36301605166bb449bebe0717488deb08282e36ad52b7e7e9
7
+ data.tar.gz: 57672622de1d536db0917e7d006b466effed144275cd7000e7cc3b4a53495d4dc29b99d73e7e35cab5be53838b4be04c7bf5c6beb0a496bb9b53d2dc7bf91727
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1 @@
1
+ gutenug
@@ -0,0 +1 @@
1
+ 2.5.3
data/Gemfile ADDED
@@ -0,0 +1,12 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem 'pragmatic_segmenter', '~> 0.3'
4
+
5
+ group :development do
6
+ gem "rspec", "~> 3.9"
7
+ gem "yard", "~> 0.7"
8
+ gem "rdoc", "~> 6.2"
9
+ gem "bundler", "~> 1.0"
10
+ gem "juwelier", "~> 2.4"
11
+ gem "simplecov", ">= 0"
12
+ end
@@ -0,0 +1,93 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ addressable (2.7.0)
5
+ public_suffix (>= 2.0.2, < 5.0)
6
+ builder (3.2.4)
7
+ descendants_tracker (0.0.4)
8
+ thread_safe (~> 0.3, >= 0.3.1)
9
+ diff-lcs (1.4.4)
10
+ docile (1.3.2)
11
+ faraday (1.0.1)
12
+ multipart-post (>= 1.2, < 3)
13
+ git (1.7.0)
14
+ rchardet (~> 1.8)
15
+ github_api (0.19.0)
16
+ addressable (~> 2.4)
17
+ descendants_tracker (~> 0.0.4)
18
+ faraday (>= 0.8, < 2)
19
+ hashie (~> 3.5, >= 3.5.2)
20
+ oauth2 (~> 1.0)
21
+ hashie (3.6.0)
22
+ highline (2.0.3)
23
+ juwelier (2.4.9)
24
+ builder
25
+ bundler
26
+ git
27
+ github_api
28
+ highline
29
+ kamelcase (~> 0)
30
+ nokogiri
31
+ psych
32
+ rake
33
+ rdoc
34
+ semver2
35
+ jwt (2.2.2)
36
+ kamelcase (0.0.2)
37
+ semver2 (~> 3)
38
+ mini_portile2 (2.4.0)
39
+ multi_json (1.15.0)
40
+ multi_xml (0.6.0)
41
+ multipart-post (2.1.1)
42
+ nokogiri (1.10.10)
43
+ mini_portile2 (~> 2.4.0)
44
+ oauth2 (1.4.4)
45
+ faraday (>= 0.8, < 2.0)
46
+ jwt (>= 1.0, < 3.0)
47
+ multi_json (~> 1.3)
48
+ multi_xml (~> 0.5)
49
+ rack (>= 1.2, < 3)
50
+ pragmatic_segmenter (0.3.22)
51
+ unicode
52
+ psych (3.2.0)
53
+ public_suffix (4.0.5)
54
+ rack (2.2.3)
55
+ rake (13.0.1)
56
+ rchardet (1.8.0)
57
+ rdoc (6.2.1)
58
+ rspec (3.9.0)
59
+ rspec-core (~> 3.9.0)
60
+ rspec-expectations (~> 3.9.0)
61
+ rspec-mocks (~> 3.9.0)
62
+ rspec-core (3.9.2)
63
+ rspec-support (~> 3.9.3)
64
+ rspec-expectations (3.9.2)
65
+ diff-lcs (>= 1.2.0, < 2.0)
66
+ rspec-support (~> 3.9.0)
67
+ rspec-mocks (3.9.1)
68
+ diff-lcs (>= 1.2.0, < 2.0)
69
+ rspec-support (~> 3.9.0)
70
+ rspec-support (3.9.3)
71
+ semver2 (3.4.2)
72
+ simplecov (0.19.0)
73
+ docile (~> 1.1)
74
+ simplecov-html (~> 0.11)
75
+ simplecov-html (0.12.2)
76
+ thread_safe (0.3.6)
77
+ unicode (0.4.4.4)
78
+ yard (0.9.25)
79
+
80
+ PLATFORMS
81
+ ruby
82
+
83
+ DEPENDENCIES
84
+ bundler (~> 1.0)
85
+ juwelier (~> 2.4)
86
+ pragmatic_segmenter (~> 0.3)
87
+ rdoc (~> 6.2)
88
+ rspec (~> 3.9)
89
+ simplecov
90
+ yard (~> 0.7)
91
+
92
+ BUNDLED WITH
93
+ 1.17.3
@@ -0,0 +1,5 @@
1
+ # Gutenug
2
+
3
+ A good enough Gutenberg parser.
4
+
5
+ Copyright (c) 2020 Jason Hutchens. See [UNLICENSE](https://github.com/kranzky/megahal2020/blob/master/UNLICENSE) for further details.
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+ require 'juwelier'
14
+ Juwelier::Tasks.new do |gem|
15
+ # gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
16
+ gem.name = "gutenug"
17
+ gem.homepage = "http://github.com/kranzky/gutenug"
18
+ gem.license = "MIT"
19
+ gem.summary = %Q{A good-enough Gutenberg parser}
20
+ gem.description = %Q{Not much more than that, really. Intended for my NaNoGenMo project.}
21
+ gem.email = "lloyd@kranzky.com"
22
+ gem.authors = ["Lloyd Kranzky"]
23
+ gem.required_ruby_version = '>= 2.5'
24
+
25
+ # dependencies defined in Gemfile
26
+ end
27
+ Juwelier::RubygemsDotOrgTasks.new
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ desc "Code coverage detail"
35
+ task :simplecov do
36
+ ENV['COVERAGE'] = "true"
37
+ Rake::Task['spec'].execute
38
+ end
39
+
40
+ task :default => :spec
41
+
42
+ require 'yard'
43
+ YARD::Rake::YardocTask.new
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org/>
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.1
@@ -0,0 +1,73 @@
1
+ # Generated by juwelier
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+ # stub: gutenug 0.1.1 ruby lib
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "gutenug".freeze
9
+ s.version = "0.1.1"
10
+
11
+ s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib".freeze]
13
+ s.authors = ["Lloyd Kranzky".freeze]
14
+ s.date = "2020-08-22"
15
+ s.description = "Not much more than that, really. Intended for my NaNoGenMo project.".freeze
16
+ s.email = "lloyd@kranzky.com".freeze
17
+ s.extra_rdoc_files = [
18
+ "README.md"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".rspec",
23
+ ".ruby-gemset",
24
+ ".ruby-version",
25
+ "Gemfile",
26
+ "Gemfile.lock",
27
+ "README.md",
28
+ "Rakefile",
29
+ "UNLICENSE",
30
+ "VERSION",
31
+ "gutenug.gemspec",
32
+ "lib/gutenug.rb",
33
+ "lib/gutenug/book.rb",
34
+ "lib/gutenug/chapter.rb",
35
+ "lib/gutenug/paragraph.rb"
36
+ ]
37
+ s.homepage = "http://github.com/kranzky/gutenug".freeze
38
+ s.licenses = ["MIT".freeze]
39
+ s.required_ruby_version = Gem::Requirement.new(">= 2.5".freeze)
40
+ s.rubygems_version = "2.7.6".freeze
41
+ s.summary = "A good-enough Gutenberg parser".freeze
42
+
43
+ if s.respond_to? :specification_version then
44
+ s.specification_version = 4
45
+
46
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
47
+ s.add_runtime_dependency(%q<pragmatic_segmenter>.freeze, ["~> 0.3"])
48
+ s.add_development_dependency(%q<rspec>.freeze, ["~> 3.9"])
49
+ s.add_development_dependency(%q<yard>.freeze, ["~> 0.7"])
50
+ s.add_development_dependency(%q<rdoc>.freeze, ["~> 6.2"])
51
+ s.add_development_dependency(%q<bundler>.freeze, ["~> 1.0"])
52
+ s.add_development_dependency(%q<juwelier>.freeze, ["~> 2.4"])
53
+ s.add_development_dependency(%q<simplecov>.freeze, [">= 0"])
54
+ else
55
+ s.add_dependency(%q<pragmatic_segmenter>.freeze, ["~> 0.3"])
56
+ s.add_dependency(%q<rspec>.freeze, ["~> 3.9"])
57
+ s.add_dependency(%q<yard>.freeze, ["~> 0.7"])
58
+ s.add_dependency(%q<rdoc>.freeze, ["~> 6.2"])
59
+ s.add_dependency(%q<bundler>.freeze, ["~> 1.0"])
60
+ s.add_dependency(%q<juwelier>.freeze, ["~> 2.4"])
61
+ s.add_dependency(%q<simplecov>.freeze, [">= 0"])
62
+ end
63
+ else
64
+ s.add_dependency(%q<pragmatic_segmenter>.freeze, ["~> 0.3"])
65
+ s.add_dependency(%q<rspec>.freeze, ["~> 3.9"])
66
+ s.add_dependency(%q<yard>.freeze, ["~> 0.7"])
67
+ s.add_dependency(%q<rdoc>.freeze, ["~> 6.2"])
68
+ s.add_dependency(%q<bundler>.freeze, ["~> 1.0"])
69
+ s.add_dependency(%q<juwelier>.freeze, ["~> 2.4"])
70
+ s.add_dependency(%q<simplecov>.freeze, [">= 0"])
71
+ end
72
+ end
73
+
@@ -0,0 +1,4 @@
1
+ require 'pragmatic_segmenter'
2
+ require 'gutenug/paragraph'
3
+ require 'gutenug/chapter'
4
+ require 'gutenug/book'
@@ -0,0 +1,45 @@
1
+ module Gutenug
2
+ class Book
3
+ def initialize(blob)
4
+ paragraphs = []
5
+ buffer = []
6
+ blob.split("\n").map(&:strip).each do |line|
7
+ if line.empty?
8
+ paragraphs << Paragraph.new(buffer)
9
+ buffer.clear
10
+ else
11
+ buffer << line
12
+ end
13
+ end
14
+ paragraphs << Paragraph.new(buffer) unless buffer.empty?
15
+ @chapters = []
16
+ candidates = []
17
+ paragraphs.chunk(&:status).each do |chunk|
18
+ if chunk.first == :invalid
19
+ _add_chapter(candidates)
20
+ candidates.clear
21
+ else
22
+ candidates << chunk
23
+ end
24
+ end
25
+ _add_chapter(candidates)
26
+ end
27
+
28
+ def to_s
29
+ @chapters.map(&:to_s).join("\n\n* * *\n\n")
30
+ end
31
+
32
+ def chapters
33
+ @chapters
34
+ end
35
+
36
+ private
37
+
38
+ def _add_chapter(candidates)
39
+ return unless candidates.any? { |chunk| chunk.first == :valid }
40
+ paragraphs = candidates.reduce([]) { |paragraphs, chunk| paragraphs | chunk.last }
41
+ return unless paragraphs.length > 1
42
+ @chapters << Chapter.new(paragraphs)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,15 @@
1
+ module Gutenug
2
+ class Chapter
3
+ def initialize(paragraphs)
4
+ @paragraphs = paragraphs
5
+ end
6
+
7
+ def to_s
8
+ @paragraphs.map(&:to_s).join("\n\n")
9
+ end
10
+
11
+ def paragraphs
12
+ @paragraphs
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,52 @@
1
+ module Gutenug
2
+ class Paragraph
3
+ def initialize(blob)
4
+ segmenter = PragmaticSegmenter::Segmenter.new(text: blob.join(' '))
5
+ @sentences = segmenter.segment
6
+ _validate
7
+ end
8
+
9
+ def to_s
10
+ @sentences.join(" ")
11
+ end
12
+
13
+ def sentences
14
+ @sentences
15
+ end
16
+
17
+ def valid!
18
+ @invalid = false
19
+ end
20
+
21
+ def invalid!
22
+ @invalid = true
23
+ end
24
+
25
+ def invalid?
26
+ @invalid
27
+ end
28
+
29
+ def suspect?
30
+ @suspect
31
+ end
32
+
33
+ def status
34
+ if invalid?
35
+ :invalid
36
+ elsif suspect?
37
+ :suspect
38
+ else
39
+ :valid
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ def _validate
46
+ @invalid = @sentences.empty?
47
+ @invalid ||= @sentences.length == 1 && @sentences.first !~ /[?!.]+["'_)\]]*$/ && sentences.first !~ /[-][-]$/
48
+ @suspect = @invalid
49
+ @suspect ||= @sentences.all? { |sentence| sentence !~ /[a-z]/ }
50
+ end
51
+ end
52
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gutenug
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Lloyd Kranzky
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-08-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pragmatic_segmenter
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: yard
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.7'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rdoc
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '6.2'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '6.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: juwelier
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.4'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.4'
97
+ - !ruby/object:Gem::Dependency
98
+ name: simplecov
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Not much more than that, really. Intended for my NaNoGenMo project.
112
+ email: lloyd@kranzky.com
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files:
116
+ - README.md
117
+ files:
118
+ - ".document"
119
+ - ".rspec"
120
+ - ".ruby-gemset"
121
+ - ".ruby-version"
122
+ - Gemfile
123
+ - Gemfile.lock
124
+ - README.md
125
+ - Rakefile
126
+ - UNLICENSE
127
+ - VERSION
128
+ - gutenug.gemspec
129
+ - lib/gutenug.rb
130
+ - lib/gutenug/book.rb
131
+ - lib/gutenug/chapter.rb
132
+ - lib/gutenug/paragraph.rb
133
+ homepage: http://github.com/kranzky/gutenug
134
+ licenses:
135
+ - MIT
136
+ metadata: {}
137
+ post_install_message:
138
+ rdoc_options: []
139
+ require_paths:
140
+ - lib
141
+ required_ruby_version: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '2.5'
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ requirements: []
152
+ rubyforge_project:
153
+ rubygems_version: 2.7.6
154
+ signing_key:
155
+ specification_version: 4
156
+ summary: A good-enough Gutenberg parser
157
+ test_files: []