yada 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e42d8f423409b299a700cbcc01d640a236f0631c
4
+ data.tar.gz: 487338b93ac7814aaaa79a5da135df25873bd1a4
5
+ SHA512:
6
+ metadata.gz: aa7ac877afca14344129d7ed0913d851f34a3b0b31ff9889bc8548c4ac724a8e082d19606d516d797f60c4c2e6afd1169692c7cb5b017b863d2d9fa9e122dd2e
7
+ data.tar.gz: 1f4b56ad0e1dfadcd9d8c16580f18d0fdd63596e2db28c33db2363bbbe2159ddcdc7e26d0bdfc79f6ef8b7420aa39dc464148e2ded3d6b1f241e6313123aefc7
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.13.6
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in yada.gemspec
4
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 Luca Ongaro
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,36 @@
1
+ # Yada
2
+
3
+ `Yada` trains a Markov chain model from some provided textual content, and uses
4
+ it to generate more text that resembles in style the provided one (but is mostly
5
+ nonsense).
6
+
7
+
8
+ ## Usage
9
+
10
+ ```ruby
11
+ yada = Yada.new
12
+ yada.train!(File.new('a_text_file.txt'))
13
+ yada.generate(10) # Generate 10 sentences
14
+ ```
15
+
16
+ Example output, when trained on Franz Kafka's Metamorphosis:
17
+
18
+ ```
19
+ I'd get kicked out on the spot. He was still hurriedly thinking all this
20
+ through, unable to decide to get out of the question - but this deterioration in
21
+ his condition was fully (in his opinion) made up for by the door to Gregor's
22
+ room and found herself face to face with him. It showed a lady fitted out with a
23
+ fur hat and fur boa who sat upright, raising a heavy fur muff that covered the
24
+ whole of her lower arm towards the viewer. she could see the dirt as well as he
25
+ could but she had simply decided to leave him to it. And you're also well aware
26
+ that we travellers spend almost the whole year away from the office, so that we
27
+ can very easily fall victim to gossip and chance and groundless complaints, and
28
+ it's almost impossible to defend yourself from that sort of thing, we don't
29
+ usually even hear about them, or if at all it's when we arrive back home
30
+ exhausted from a trip, and that's when we feel the harmful effects of what's
31
+ been going on without even knowing what caused them.
32
+ ```
33
+
34
+ ## License
35
+
36
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "yada"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,18 @@
1
+ $LOAD_PATH << File.expand_path('../../lib', __FILE__)
2
+ require 'yada'
3
+ require 'open-uri'
4
+
5
+ # Open The Metamorphosis by Franz Kafka, train a Markov model on it, and use it
6
+ # to generate text:
7
+ URI.parse('http://www.gutenberg.org/cache/epub/5200/pg5200.txt').open do |text|
8
+ text = text.drop_while { |line|
9
+ !line.start_with?('One morning, when Gregor Samsa')
10
+ }.take_while { |line|
11
+ !line.start_with?('End of the Project Gutenberg EBook')
12
+ }
13
+
14
+ yada = Yada.new(ngrams: 1)
15
+ yada.train!(text)
16
+
17
+ puts yada.generate(5).join
18
+ end
@@ -0,0 +1,18 @@
1
+ $LOAD_PATH << File.expand_path('../../lib', __FILE__)
2
+ require 'yada'
3
+ require 'open-uri'
4
+
5
+ # Open The Metamorphosis by Franz Kafka, train a Markov model on it, and use it
6
+ # to generate text:
7
+ URI.parse('http://www.gutenberg.org/cache/epub/5200/pg5200.txt').open do |text|
8
+ text = text.drop_while { |line|
9
+ !line.start_with?('One morning, when Gregor Samsa')
10
+ }.take_while { |line|
11
+ !line.start_with?('End of the Project Gutenberg EBook')
12
+ }
13
+
14
+ yada = Yada.new(ngram: 4, tokenize: /./, join: '')
15
+ yada.train!(text)
16
+
17
+ puts yada.generate(50).join
18
+ end
@@ -0,0 +1,49 @@
1
+ require "yada/version"
2
+ require "yada/input"
3
+ require "yada/markov"
4
+
5
+ class Yada
6
+ DEFAULT_OPTIONS = {
7
+ :ngram => 1,
8
+ :tokenize => /[\w\-\/]+| ?[^\s]+/,
9
+ :join => ' ',
10
+ :punctuation => /[^\w\-\/]+/,
11
+ :stop => /[\.\?!;]\s+/
12
+ }
13
+
14
+ def initialize(options = {})
15
+ @options = DEFAULT_OPTIONS.merge(options)
16
+ @markov = Markov.new(@options[:ngram], @options[:tokenize], @options[:join])
17
+ end
18
+
19
+ def train!(data)
20
+ input = Input.new(data, @options[:stop])
21
+ @markov.train!(input)
22
+ true
23
+ end
24
+
25
+ def generate(n = 1)
26
+ start = [Markov::START] * @options[:ngram]
27
+ (1..n).map do
28
+ generate_tokens(start).reduce('') do |sentence, token|
29
+ if token.match(@options[:punctuation])
30
+ sentence + token
31
+ else
32
+ sentence + @options[:join] + token
33
+ end
34
+ end
35
+ end
36
+ end
37
+
38
+ private def generate_tokens(ngram, tokens = [])
39
+ random = rand()
40
+ next_token = @markov.tokens.reduce(0.0) do |probability, token|
41
+ probability += @markov.transition_probability(ngram, token)
42
+ break token if probability > random
43
+ probability
44
+ end
45
+
46
+ return tokens if next_token == Markov::STOP
47
+ generate_tokens(ngram.drop(1) + [next_token], tokens + [next_token])
48
+ end
49
+ end
@@ -0,0 +1,25 @@
1
+ class Yada
2
+ class Input
3
+ include Enumerable
4
+
5
+ def initialize(source, stop = /\.\s+/)
6
+ @source, @stop = source, stop
7
+ end
8
+
9
+ def each(&block)
10
+ accumulator = ''
11
+ @source.each do |line|
12
+ accumulator = regroup(accumulator, line, &block)
13
+ end
14
+ yield accumulator if accumulator != ''
15
+ nil
16
+ end
17
+
18
+ private def regroup(accumulator, text, &block)
19
+ pre, stop, rest = text.partition(@stop)
20
+ return accumulator + pre if stop == ''
21
+ yield accumulator + pre + stop
22
+ regroup('', rest, &block)
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,54 @@
1
+ require 'set'
2
+
3
+ class Yada
4
+ class Markov
5
+ START = :__start
6
+ STOP = :__stop
7
+
8
+ attr_reader :tokens
9
+
10
+ def initialize(n = 1, tokenize = /[\w\-\/]+|[^\s]+/, join = ' ')
11
+ @n, @tokenize, @join = n, tokenize, join
12
+ @transition_count = Hash.new(0)
13
+ @ngram_count = Hash.new(0)
14
+ @tokens = Set.new
15
+ end
16
+
17
+ def train!(data)
18
+ prefix = [START] * @n
19
+ suffix = [STOP]
20
+
21
+ data.each do |text|
22
+ Join.new(prefix, text.scan(@tokenize), suffix).each_cons(@n + 1) do |*ngram, token|
23
+ joined_ngram = ngram.join(@join)
24
+ @tokens.add(token)
25
+ @transition_count[[joined_ngram, token]] += 1
26
+ @ngram_count[joined_ngram] += 1
27
+ end
28
+ end
29
+ end
30
+
31
+ def transition_probability(ngram, token)
32
+ joined_ngram = ngram.join(@join)
33
+ return 0 if @ngram_count[joined_ngram] == 0
34
+ Rational(@transition_count[[joined_ngram, token]], @ngram_count[joined_ngram])
35
+ end
36
+
37
+ class Join
38
+ include Enumerable
39
+
40
+ def initialize(*enumerables)
41
+ @enumerables = enumerables
42
+ end
43
+
44
+ def each
45
+ @enumerables.each do |enumerable|
46
+ enumerable.each do |element|
47
+ yield element
48
+ end
49
+ end
50
+ nil
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,3 @@
1
+ class Yada
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,33 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'yada/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "yada"
8
+ spec.version = Yada::VERSION
9
+ spec.authors = ["Luca Ongaro"]
10
+ spec.email = ["lukeongaro@gmail.com"]
11
+
12
+ spec.summary = 'Nonsensical text generator'
13
+ spec.description = 'Train a Markov Chain model on textual content and use it to generate nonsensical text in that style'
14
+ spec.homepage = 'https://github.com/lucaong/yada'
15
+ spec.license = "MIT"
16
+
17
+ if spec.respond_to?(:metadata)
18
+ spec.metadata['allowed_push_host'] = "https://rubygems.org"
19
+ else
20
+ raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
21
+ end
22
+
23
+ spec.files = `git ls-files -z`.split("\x0").reject do |f|
24
+ f.match(%r{^(test|spec|features)/})
25
+ end
26
+ spec.bindir = "exe"
27
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
+ spec.require_paths = ["lib"]
29
+
30
+ spec.add_development_dependency "bundler", "~> 1.13"
31
+ spec.add_development_dependency "rake", "~> 10.0"
32
+ spec.add_development_dependency "rspec", "~> 3.0"
33
+ end
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yada
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Luca Ongaro
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-11-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.13'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.13'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ description: Train a Markov Chain model on textual content and use it to generate
56
+ nonsensical text in that style
57
+ email:
58
+ - lukeongaro@gmail.com
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - ".gitignore"
64
+ - ".rspec"
65
+ - ".travis.yml"
66
+ - Gemfile
67
+ - LICENSE.txt
68
+ - README.md
69
+ - Rakefile
70
+ - bin/console
71
+ - bin/setup
72
+ - examples/kafka.rb
73
+ - examples/kafka_by_character.rb
74
+ - lib/yada.rb
75
+ - lib/yada/input.rb
76
+ - lib/yada/markov.rb
77
+ - lib/yada/version.rb
78
+ - yada.gemspec
79
+ homepage: https://github.com/lucaong/yada
80
+ licenses:
81
+ - MIT
82
+ metadata:
83
+ allowed_push_host: https://rubygems.org
84
+ post_install_message:
85
+ rdoc_options: []
86
+ require_paths:
87
+ - lib
88
+ required_ruby_version: !ruby/object:Gem::Requirement
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ requirements: []
99
+ rubyforge_project:
100
+ rubygems_version: 2.5.1
101
+ signing_key:
102
+ specification_version: 4
103
+ summary: Nonsensical text generator
104
+ test_files: []