yada 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +36 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/examples/kafka.rb +18 -0
- data/examples/kafka_by_character.rb +18 -0
- data/lib/yada.rb +49 -0
- data/lib/yada/input.rb +25 -0
- data/lib/yada/markov.rb +54 -0
- data/lib/yada/version.rb +3 -0
- data/yada.gemspec +33 -0
- metadata +104 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e42d8f423409b299a700cbcc01d640a236f0631c
|
4
|
+
data.tar.gz: 487338b93ac7814aaaa79a5da135df25873bd1a4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: aa7ac877afca14344129d7ed0913d851f34a3b0b31ff9889bc8548c4ac724a8e082d19606d516d797f60c4c2e6afd1169692c7cb5b017b863d2d9fa9e122dd2e
|
7
|
+
data.tar.gz: 1f4b56ad0e1dfadcd9d8c16580f18d0fdd63596e2db28c33db2363bbbe2159ddcdc7e26d0bdfc79f6ef8b7420aa39dc464148e2ded3d6b1f241e6313123aefc7
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2016 Luca Ongaro
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# Yada
|
2
|
+
|
3
|
+
`Yada` trains a Markov chain model from some provided textual content, and uses
|
4
|
+
it to generate more text that resembles in style the provided one (but is mostly
|
5
|
+
nonsense).
|
6
|
+
|
7
|
+
|
8
|
+
## Usage
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
yada = Yada.new
|
12
|
+
yada.train!(File.new('a_text_file.txt'))
|
13
|
+
yada.generate(10) # Generate 10 sentences
|
14
|
+
```
|
15
|
+
|
16
|
+
Example output, when trained on Franz Kafka's Metamorphosis:
|
17
|
+
|
18
|
+
```
|
19
|
+
I'd get kicked out on the spot. He was still hurriedly thinking all this
|
20
|
+
through, unable to decide to get out of the question - but this deterioration in
|
21
|
+
his condition was fully (in his opinion) made up for by the door to Gregor's
|
22
|
+
room and found herself face to face with him. It showed a lady fitted out with a
|
23
|
+
fur hat and fur boa who sat upright, raising a heavy fur muff that covered the
|
24
|
+
whole of her lower arm towards the viewer. she could see the dirt as well as he
|
25
|
+
could but she had simply decided to leave him to it. And you're also well aware
|
26
|
+
that we travellers spend almost the whole year away from the office, so that we
|
27
|
+
can very easily fall victim to gossip and chance and groundless complaints, and
|
28
|
+
it's almost impossible to defend yourself from that sort of thing, we don't
|
29
|
+
usually even hear about them, or if at all it's when we arrive back home
|
30
|
+
exhausted from a trip, and that's when we feel the harmful effects of what's
|
31
|
+
been going on without even knowing what caused them.
|
32
|
+
```
|
33
|
+
|
34
|
+
## License
|
35
|
+
|
36
|
+
The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "yada"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/examples/kafka.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
$LOAD_PATH << File.expand_path('../../lib', __FILE__)
|
2
|
+
require 'yada'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
# Open The Metamorphosis by Franz Kafka, train a Markov model on it, and use it
|
6
|
+
# to generate text:
|
7
|
+
URI.parse('http://www.gutenberg.org/cache/epub/5200/pg5200.txt').open do |text|
|
8
|
+
text = text.drop_while { |line|
|
9
|
+
!line.start_with?('One morning, when Gregor Samsa')
|
10
|
+
}.take_while { |line|
|
11
|
+
!line.start_with?('End of the Project Gutenberg EBook')
|
12
|
+
}
|
13
|
+
|
14
|
+
yada = Yada.new(ngrams: 1)
|
15
|
+
yada.train!(text)
|
16
|
+
|
17
|
+
puts yada.generate(5).join
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
$LOAD_PATH << File.expand_path('../../lib', __FILE__)
|
2
|
+
require 'yada'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
# Open The Metamorphosis by Franz Kafka, train a Markov model on it, and use it
|
6
|
+
# to generate text:
|
7
|
+
URI.parse('http://www.gutenberg.org/cache/epub/5200/pg5200.txt').open do |text|
|
8
|
+
text = text.drop_while { |line|
|
9
|
+
!line.start_with?('One morning, when Gregor Samsa')
|
10
|
+
}.take_while { |line|
|
11
|
+
!line.start_with?('End of the Project Gutenberg EBook')
|
12
|
+
}
|
13
|
+
|
14
|
+
yada = Yada.new(ngram: 4, tokenize: /./, join: '')
|
15
|
+
yada.train!(text)
|
16
|
+
|
17
|
+
puts yada.generate(50).join
|
18
|
+
end
|
data/lib/yada.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require "yada/version"
|
2
|
+
require "yada/input"
|
3
|
+
require "yada/markov"
|
4
|
+
|
5
|
+
class Yada
|
6
|
+
DEFAULT_OPTIONS = {
|
7
|
+
:ngram => 1,
|
8
|
+
:tokenize => /[\w\-\/]+| ?[^\s]+/,
|
9
|
+
:join => ' ',
|
10
|
+
:punctuation => /[^\w\-\/]+/,
|
11
|
+
:stop => /[\.\?!;]\s+/
|
12
|
+
}
|
13
|
+
|
14
|
+
def initialize(options = {})
|
15
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
16
|
+
@markov = Markov.new(@options[:ngram], @options[:tokenize], @options[:join])
|
17
|
+
end
|
18
|
+
|
19
|
+
def train!(data)
|
20
|
+
input = Input.new(data, @options[:stop])
|
21
|
+
@markov.train!(input)
|
22
|
+
true
|
23
|
+
end
|
24
|
+
|
25
|
+
def generate(n = 1)
|
26
|
+
start = [Markov::START] * @options[:ngram]
|
27
|
+
(1..n).map do
|
28
|
+
generate_tokens(start).reduce('') do |sentence, token|
|
29
|
+
if token.match(@options[:punctuation])
|
30
|
+
sentence + token
|
31
|
+
else
|
32
|
+
sentence + @options[:join] + token
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private def generate_tokens(ngram, tokens = [])
|
39
|
+
random = rand()
|
40
|
+
next_token = @markov.tokens.reduce(0.0) do |probability, token|
|
41
|
+
probability += @markov.transition_probability(ngram, token)
|
42
|
+
break token if probability > random
|
43
|
+
probability
|
44
|
+
end
|
45
|
+
|
46
|
+
return tokens if next_token == Markov::STOP
|
47
|
+
generate_tokens(ngram.drop(1) + [next_token], tokens + [next_token])
|
48
|
+
end
|
49
|
+
end
|
data/lib/yada/input.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
class Yada
|
2
|
+
class Input
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
def initialize(source, stop = /\.\s+/)
|
6
|
+
@source, @stop = source, stop
|
7
|
+
end
|
8
|
+
|
9
|
+
def each(&block)
|
10
|
+
accumulator = ''
|
11
|
+
@source.each do |line|
|
12
|
+
accumulator = regroup(accumulator, line, &block)
|
13
|
+
end
|
14
|
+
yield accumulator if accumulator != ''
|
15
|
+
nil
|
16
|
+
end
|
17
|
+
|
18
|
+
private def regroup(accumulator, text, &block)
|
19
|
+
pre, stop, rest = text.partition(@stop)
|
20
|
+
return accumulator + pre if stop == ''
|
21
|
+
yield accumulator + pre + stop
|
22
|
+
regroup('', rest, &block)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
data/lib/yada/markov.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
class Yada
|
4
|
+
class Markov
|
5
|
+
START = :__start
|
6
|
+
STOP = :__stop
|
7
|
+
|
8
|
+
attr_reader :tokens
|
9
|
+
|
10
|
+
def initialize(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ')
|
11
|
+
@n, @tokenize, @join = n, tokenize, join
|
12
|
+
@transition_count = Hash.new(0)
|
13
|
+
@ngram_count = Hash.new(0)
|
14
|
+
@tokens = Set.new
|
15
|
+
end
|
16
|
+
|
17
|
+
def train!(data)
|
18
|
+
prefix = [START] * @n
|
19
|
+
suffix = [STOP]
|
20
|
+
|
21
|
+
data.each do |text|
|
22
|
+
Join.new(prefix, text.scan(@tokenize), suffix).each_cons(@n + 1) do |*ngram, token|
|
23
|
+
joined_ngram = ngram.join(@join)
|
24
|
+
@tokens.add(token)
|
25
|
+
@transition_count[[joined_ngram, token]] += 1
|
26
|
+
@ngram_count[joined_ngram] += 1
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def transition_probability(ngram, token)
|
32
|
+
joined_ngram = ngram.join(@join)
|
33
|
+
return 0 if @ngram_count[joined_ngram] == 0
|
34
|
+
Rational(@transition_count[[joined_ngram, token]], @ngram_count[joined_ngram])
|
35
|
+
end
|
36
|
+
|
37
|
+
class Join
|
38
|
+
include Enumerable
|
39
|
+
|
40
|
+
def initialize(*enumerables)
|
41
|
+
@enumerables = enumerables
|
42
|
+
end
|
43
|
+
|
44
|
+
def each
|
45
|
+
@enumerables.each do |enumerable|
|
46
|
+
enumerable.each do |element|
|
47
|
+
yield element
|
48
|
+
end
|
49
|
+
end
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/yada/version.rb
ADDED
data/yada.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'yada/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "yada"
|
8
|
+
spec.version = Yada::VERSION
|
9
|
+
spec.authors = ["Luca Ongaro"]
|
10
|
+
spec.email = ["lukeongaro@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = 'Nonsensical text generator'
|
13
|
+
spec.description = 'Train a Markov Chain model on textual content and use it to generate nonsensical text in that style'
|
14
|
+
spec.homepage = 'https://github.com/lucaong/yada'
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
if spec.respond_to?(:metadata)
|
18
|
+
spec.metadata['allowed_push_host'] = "https://rubygems.org"
|
19
|
+
else
|
20
|
+
raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
21
|
+
end
|
22
|
+
|
23
|
+
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
24
|
+
f.match(%r{^(test|spec|features)/})
|
25
|
+
end
|
26
|
+
spec.bindir = "exe"
|
27
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
+
spec.require_paths = ["lib"]
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 1.13"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: yada
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Luca Ongaro
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-11-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.13'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.13'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description: Train a Markov Chain model on textual content and use it to generate
|
56
|
+
nonsensical text in that style
|
57
|
+
email:
|
58
|
+
- lukeongaro@gmail.com
|
59
|
+
executables: []
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- ".gitignore"
|
64
|
+
- ".rspec"
|
65
|
+
- ".travis.yml"
|
66
|
+
- Gemfile
|
67
|
+
- LICENSE.txt
|
68
|
+
- README.md
|
69
|
+
- Rakefile
|
70
|
+
- bin/console
|
71
|
+
- bin/setup
|
72
|
+
- examples/kafka.rb
|
73
|
+
- examples/kafka_by_character.rb
|
74
|
+
- lib/yada.rb
|
75
|
+
- lib/yada/input.rb
|
76
|
+
- lib/yada/markov.rb
|
77
|
+
- lib/yada/version.rb
|
78
|
+
- yada.gemspec
|
79
|
+
homepage: https://github.com/lucaong/yada
|
80
|
+
licenses:
|
81
|
+
- MIT
|
82
|
+
metadata:
|
83
|
+
allowed_push_host: https://rubygems.org
|
84
|
+
post_install_message:
|
85
|
+
rdoc_options: []
|
86
|
+
require_paths:
|
87
|
+
- lib
|
88
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
requirements:
|
95
|
+
- - ">="
|
96
|
+
- !ruby/object:Gem::Version
|
97
|
+
version: '0'
|
98
|
+
requirements: []
|
99
|
+
rubyforge_project:
|
100
|
+
rubygems_version: 2.5.1
|
101
|
+
signing_key:
|
102
|
+
specification_version: 4
|
103
|
+
summary: Nonsensical text generator
|
104
|
+
test_files: []
|