gutenug 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0f593323290576de24cc4acec51bc6bb40ad7b1a31af2f704e0c97d2db9c5b65
4
+ data.tar.gz: 75c17d4a9ed9d2805e9e5a3a18e1a846401f53dbbda38945102863a1699117aa
5
+ SHA512:
6
+ metadata.gz: 603ae07db6703c04b478ffb29018af65f00f1a9a09ae873ad896781d9640fc492b7a022318e4adca36301605166bb449bebe0717488deb08282e36ad52b7e7e9
7
+ data.tar.gz: 57672622de1d536db0917e7d006b466effed144275cd7000e7cc3b4a53495d4dc29b99d73e7e35cab5be53838b4be04c7bf5c6beb0a496bb9b53d2dc7bf91727
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1 @@
1
+ gutenug
@@ -0,0 +1 @@
1
+ 2.5.3
data/Gemfile ADDED
@@ -0,0 +1,12 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem 'pragmatic_segmenter', '~> 0.3'
4
+
5
+ group :development do
6
+ gem "rspec", "~> 3.9"
7
+ gem "yard", "~> 0.7"
8
+ gem "rdoc", "~> 6.2"
9
+ gem "bundler", "~> 1.0"
10
+ gem "juwelier", "~> 2.4"
11
+ gem "simplecov", ">= 0"
12
+ end
@@ -0,0 +1,93 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ addressable (2.7.0)
5
+ public_suffix (>= 2.0.2, < 5.0)
6
+ builder (3.2.4)
7
+ descendants_tracker (0.0.4)
8
+ thread_safe (~> 0.3, >= 0.3.1)
9
+ diff-lcs (1.4.4)
10
+ docile (1.3.2)
11
+ faraday (1.0.1)
12
+ multipart-post (>= 1.2, < 3)
13
+ git (1.7.0)
14
+ rchardet (~> 1.8)
15
+ github_api (0.19.0)
16
+ addressable (~> 2.4)
17
+ descendants_tracker (~> 0.0.4)
18
+ faraday (>= 0.8, < 2)
19
+ hashie (~> 3.5, >= 3.5.2)
20
+ oauth2 (~> 1.0)
21
+ hashie (3.6.0)
22
+ highline (2.0.3)
23
+ juwelier (2.4.9)
24
+ builder
25
+ bundler
26
+ git
27
+ github_api
28
+ highline
29
+ kamelcase (~> 0)
30
+ nokogiri
31
+ psych
32
+ rake
33
+ rdoc
34
+ semver2
35
+ jwt (2.2.2)
36
+ kamelcase (0.0.2)
37
+ semver2 (~> 3)
38
+ mini_portile2 (2.4.0)
39
+ multi_json (1.15.0)
40
+ multi_xml (0.6.0)
41
+ multipart-post (2.1.1)
42
+ nokogiri (1.10.10)
43
+ mini_portile2 (~> 2.4.0)
44
+ oauth2 (1.4.4)
45
+ faraday (>= 0.8, < 2.0)
46
+ jwt (>= 1.0, < 3.0)
47
+ multi_json (~> 1.3)
48
+ multi_xml (~> 0.5)
49
+ rack (>= 1.2, < 3)
50
+ pragmatic_segmenter (0.3.22)
51
+ unicode
52
+ psych (3.2.0)
53
+ public_suffix (4.0.5)
54
+ rack (2.2.3)
55
+ rake (13.0.1)
56
+ rchardet (1.8.0)
57
+ rdoc (6.2.1)
58
+ rspec (3.9.0)
59
+ rspec-core (~> 3.9.0)
60
+ rspec-expectations (~> 3.9.0)
61
+ rspec-mocks (~> 3.9.0)
62
+ rspec-core (3.9.2)
63
+ rspec-support (~> 3.9.3)
64
+ rspec-expectations (3.9.2)
65
+ diff-lcs (>= 1.2.0, < 2.0)
66
+ rspec-support (~> 3.9.0)
67
+ rspec-mocks (3.9.1)
68
+ diff-lcs (>= 1.2.0, < 2.0)
69
+ rspec-support (~> 3.9.0)
70
+ rspec-support (3.9.3)
71
+ semver2 (3.4.2)
72
+ simplecov (0.19.0)
73
+ docile (~> 1.1)
74
+ simplecov-html (~> 0.11)
75
+ simplecov-html (0.12.2)
76
+ thread_safe (0.3.6)
77
+ unicode (0.4.4.4)
78
+ yard (0.9.25)
79
+
80
+ PLATFORMS
81
+ ruby
82
+
83
+ DEPENDENCIES
84
+ bundler (~> 1.0)
85
+ juwelier (~> 2.4)
86
+ pragmatic_segmenter (~> 0.3)
87
+ rdoc (~> 6.2)
88
+ rspec (~> 3.9)
89
+ simplecov
90
+ yard (~> 0.7)
91
+
92
+ BUNDLED WITH
93
+ 1.17.3
@@ -0,0 +1,5 @@
1
+ # Gutenug
2
+
3
+ A good enough Gutenberg parser.
4
+
5
+ Copyright (c) 2020 Jason Hutchens. See [UNLICENSE](https://github.com/kranzky/megahal2020/blob/master/UNLICENSE) for further details.
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+ require 'juwelier'
14
+ Juwelier::Tasks.new do |gem|
15
+ # gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
16
+ gem.name = "gutenug"
17
+ gem.homepage = "http://github.com/kranzky/gutenug"
18
+ gem.license = "MIT"
19
+ gem.summary = %Q{A good-enough Gutenberg parser}
20
+ gem.description = %Q{Not much more than that, really. Intended for my NaNoGenMo project.}
21
+ gem.email = "lloyd@kranzky.com"
22
+ gem.authors = ["Lloyd Kranzky"]
23
+ gem.required_ruby_version = '>= 2.5'
24
+
25
+ # dependencies defined in Gemfile
26
+ end
27
+ Juwelier::RubygemsDotOrgTasks.new
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ desc "Code coverage detail"
35
+ task :simplecov do
36
+ ENV['COVERAGE'] = "true"
37
+ Rake::Task['spec'].execute
38
+ end
39
+
40
+ task :default => :spec
41
+
42
+ require 'yard'
43
+ YARD::Rake::YardocTask.new
@@ -0,0 +1,24 @@
1
+ This is free and unencumbered software released into the public domain.
2
+
3
+ Anyone is free to copy, modify, publish, use, compile, sell, or
4
+ distribute this software, either in source code form or as a compiled
5
+ binary, for any purpose, commercial or non-commercial, and by any
6
+ means.
7
+
8
+ In jurisdictions that recognize copyright laws, the author or authors
9
+ of this software dedicate any and all copyright interest in the
10
+ software to the public domain. We make this dedication for the benefit
11
+ of the public at large and to the detriment of our heirs and
12
+ successors. We intend this dedication to be an overt act of
13
+ relinquishment in perpetuity of all present and future rights to this
14
+ software under copyright law.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20
+ OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21
+ ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ For more information, please refer to <http://unlicense.org/>
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.1
@@ -0,0 +1,73 @@
1
+ # Generated by juwelier
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Juwelier::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+ # stub: gutenug 0.1.1 ruby lib
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = "gutenug".freeze
9
+ s.version = "0.1.1"
10
+
11
+ s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib".freeze]
13
+ s.authors = ["Lloyd Kranzky".freeze]
14
+ s.date = "2020-08-22"
15
+ s.description = "Not much more than that, really. Intended for my NaNoGenMo project.".freeze
16
+ s.email = "lloyd@kranzky.com".freeze
17
+ s.extra_rdoc_files = [
18
+ "README.md"
19
+ ]
20
+ s.files = [
21
+ ".document",
22
+ ".rspec",
23
+ ".ruby-gemset",
24
+ ".ruby-version",
25
+ "Gemfile",
26
+ "Gemfile.lock",
27
+ "README.md",
28
+ "Rakefile",
29
+ "UNLICENSE",
30
+ "VERSION",
31
+ "gutenug.gemspec",
32
+ "lib/gutenug.rb",
33
+ "lib/gutenug/book.rb",
34
+ "lib/gutenug/chapter.rb",
35
+ "lib/gutenug/paragraph.rb"
36
+ ]
37
+ s.homepage = "http://github.com/kranzky/gutenug".freeze
38
+ s.licenses = ["MIT".freeze]
39
+ s.required_ruby_version = Gem::Requirement.new(">= 2.5".freeze)
40
+ s.rubygems_version = "2.7.6".freeze
41
+ s.summary = "A good-enough Gutenberg parser".freeze
42
+
43
+ if s.respond_to? :specification_version then
44
+ s.specification_version = 4
45
+
46
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
47
+ s.add_runtime_dependency(%q<pragmatic_segmenter>.freeze, ["~> 0.3"])
48
+ s.add_development_dependency(%q<rspec>.freeze, ["~> 3.9"])
49
+ s.add_development_dependency(%q<yard>.freeze, ["~> 0.7"])
50
+ s.add_development_dependency(%q<rdoc>.freeze, ["~> 6.2"])
51
+ s.add_development_dependency(%q<bundler>.freeze, ["~> 1.0"])
52
+ s.add_development_dependency(%q<juwelier>.freeze, ["~> 2.4"])
53
+ s.add_development_dependency(%q<simplecov>.freeze, [">= 0"])
54
+ else
55
+ s.add_dependency(%q<pragmatic_segmenter>.freeze, ["~> 0.3"])
56
+ s.add_dependency(%q<rspec>.freeze, ["~> 3.9"])
57
+ s.add_dependency(%q<yard>.freeze, ["~> 0.7"])
58
+ s.add_dependency(%q<rdoc>.freeze, ["~> 6.2"])
59
+ s.add_dependency(%q<bundler>.freeze, ["~> 1.0"])
60
+ s.add_dependency(%q<juwelier>.freeze, ["~> 2.4"])
61
+ s.add_dependency(%q<simplecov>.freeze, [">= 0"])
62
+ end
63
+ else
64
+ s.add_dependency(%q<pragmatic_segmenter>.freeze, ["~> 0.3"])
65
+ s.add_dependency(%q<rspec>.freeze, ["~> 3.9"])
66
+ s.add_dependency(%q<yard>.freeze, ["~> 0.7"])
67
+ s.add_dependency(%q<rdoc>.freeze, ["~> 6.2"])
68
+ s.add_dependency(%q<bundler>.freeze, ["~> 1.0"])
69
+ s.add_dependency(%q<juwelier>.freeze, ["~> 2.4"])
70
+ s.add_dependency(%q<simplecov>.freeze, [">= 0"])
71
+ end
72
+ end
73
+
@@ -0,0 +1,4 @@
1
+ require 'pragmatic_segmenter'
2
+ require 'gutenug/paragraph'
3
+ require 'gutenug/chapter'
4
+ require 'gutenug/book'
@@ -0,0 +1,45 @@
1
+ module Gutenug
2
+ class Book
3
+ def initialize(blob)
4
+ paragraphs = []
5
+ buffer = []
6
+ blob.split("\n").map(&:strip).each do |line|
7
+ if line.empty?
8
+ paragraphs << Paragraph.new(buffer)
9
+ buffer.clear
10
+ else
11
+ buffer << line
12
+ end
13
+ end
14
+ paragraphs << Paragraph.new(buffer) unless buffer.empty?
15
+ @chapters = []
16
+ candidates = []
17
+ paragraphs.chunk(&:status).each do |chunk|
18
+ if chunk.first == :invalid
19
+ _add_chapter(candidates)
20
+ candidates.clear
21
+ else
22
+ candidates << chunk
23
+ end
24
+ end
25
+ _add_chapter(candidates)
26
+ end
27
+
28
+ def to_s
29
+ @chapters.map(&:to_s).join("\n\n* * *\n\n")
30
+ end
31
+
32
+ def chapters
33
+ @chapters
34
+ end
35
+
36
+ private
37
+
38
+ def _add_chapter(candidates)
39
+ return unless candidates.any? { |chunk| chunk.first == :valid }
40
+ paragraphs = candidates.reduce([]) { |paragraphs, chunk| paragraphs | chunk.last }
41
+ return unless paragraphs.length > 1
42
+ @chapters << Chapter.new(paragraphs)
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,15 @@
1
+ module Gutenug
2
+ class Chapter
3
+ def initialize(paragraphs)
4
+ @paragraphs = paragraphs
5
+ end
6
+
7
+ def to_s
8
+ @paragraphs.map(&:to_s).join("\n\n")
9
+ end
10
+
11
+ def paragraphs
12
+ @paragraphs
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,52 @@
1
+ module Gutenug
2
+ class Paragraph
3
+ def initialize(blob)
4
+ segmenter = PragmaticSegmenter::Segmenter.new(text: blob.join(' '))
5
+ @sentences = segmenter.segment
6
+ _validate
7
+ end
8
+
9
+ def to_s
10
+ @sentences.join(" ")
11
+ end
12
+
13
+ def sentences
14
+ @sentences
15
+ end
16
+
17
+ def valid!
18
+ @invalid = false
19
+ end
20
+
21
+ def invalid!
22
+ @invalid = true
23
+ end
24
+
25
+ def invalid?
26
+ @invalid
27
+ end
28
+
29
+ def suspect?
30
+ @suspect
31
+ end
32
+
33
+ def status
34
+ if invalid?
35
+ :invalid
36
+ elsif suspect?
37
+ :suspect
38
+ else
39
+ :valid
40
+ end
41
+ end
42
+
43
+ private
44
+
45
+ def _validate
46
+ @invalid = @sentences.empty?
47
+ @invalid ||= @sentences.length == 1 && @sentences.first !~ /[?!.]+["'_)\]]*$/ && sentences.first !~ /[-][-]$/
48
+ @suspect = @invalid
49
+ @suspect ||= @sentences.all? { |sentence| sentence !~ /[a-z]/ }
50
+ end
51
+ end
52
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gutenug
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.1
5
+ platform: ruby
6
+ authors:
7
+ - Lloyd Kranzky
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-08-22 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: pragmatic_segmenter
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.3'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.9'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.9'
41
+ - !ruby/object:Gem::Dependency
42
+ name: yard
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.7'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '0.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rdoc
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '6.2'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '6.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: juwelier
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '2.4'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '2.4'
97
+ - !ruby/object:Gem::Dependency
98
+ name: simplecov
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ description: Not much more than that, really. Intended for my NaNoGenMo project.
112
+ email: lloyd@kranzky.com
113
+ executables: []
114
+ extensions: []
115
+ extra_rdoc_files:
116
+ - README.md
117
+ files:
118
+ - ".document"
119
+ - ".rspec"
120
+ - ".ruby-gemset"
121
+ - ".ruby-version"
122
+ - Gemfile
123
+ - Gemfile.lock
124
+ - README.md
125
+ - Rakefile
126
+ - UNLICENSE
127
+ - VERSION
128
+ - gutenug.gemspec
129
+ - lib/gutenug.rb
130
+ - lib/gutenug/book.rb
131
+ - lib/gutenug/chapter.rb
132
+ - lib/gutenug/paragraph.rb
133
+ homepage: http://github.com/kranzky/gutenug
134
+ licenses:
135
+ - MIT
136
+ metadata: {}
137
+ post_install_message:
138
+ rdoc_options: []
139
+ require_paths:
140
+ - lib
141
+ required_ruby_version: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '2.5'
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ requirements: []
152
+ rubyforge_project:
153
+ rubygems_version: 2.7.6
154
+ signing_key:
155
+ specification_version: 4
156
+ summary: A good-enough Gutenberg parser
157
+ test_files: []