ruby_markovify 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 418c85d51a3ff770f15683624daaaf228c4c9e72
4
+ data.tar.gz: 3abddddeedcf70339a40def10b0ce1f876b6492c
5
+ SHA512:
6
+ metadata.gz: 65ffb392f740d449c9e45cf4497d3d631aa8d826cd73cb04c13a4f86140e9c28b0cd0f0e5f5886822030a0eb5f55e75b5516f97f36f1875c51926a6707959f52
7
+ data.tar.gz: ecd84fdc50c2c27993f3e4de937f21c9638e3afb578fc248d97a60ffd2b73f27d72c3dc1833df9d829f90292e1519745a6bbccf3202d35c79d42125b174b7404
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in ruby_markovify.gemspec
4
+ gemspec
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2016 meew0
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,25 @@
1
+ The following text is the license for markovify by Jeremy Singer-Vine, which
2
+ ruby_markovify is based on and uses code from.
3
+ ===================
4
+
5
+ The MIT License (MIT)
6
+
7
+ Copyright (c) 2015, Jeremy Singer-Vine
8
+
9
+ Permission is hereby granted, free of charge, to any person obtaining a copy
10
+ of this software and associated documentation files (the "Software"), to deal
11
+ in the Software without restriction, including without limitation the rights
12
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13
+ copies of the Software, and to permit persons to whom the Software is
14
+ furnished to do so, subject to the following conditions:
15
+
16
+ The above copyright notice and this permission notice shall be included in all
17
+ copies or substantial portions of the Software.
18
+
19
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25
+ SOFTWARE.
@@ -0,0 +1,57 @@
1
+ # RubyMarkovify
2
+
3
+ A Ruby port of the excellent [markovify](https://github.com/jsvine/markovify) Python library.
4
+ ## Installation
5
+
6
+ Add this line to your application's Gemfile:
7
+
8
+ ```ruby
9
+ gem 'ruby_markovify'
10
+ ```
11
+
12
+ And then execute:
13
+
14
+ $ bundle
15
+
16
+ Or install it yourself as:
17
+
18
+ $ gem install ruby_markovify
19
+
20
+ ## Usage
21
+
22
+ The ruby_markovify method names are identical to the ones in markovify, so if you need specific methods you can just read the markovify docs.
23
+
24
+ Here's a basic usage example:
25
+ ```rb
26
+ require 'ruby_markovify'
27
+
28
+ # Read the corpus from a file
29
+ corpus = File.read('corpus.txt')
30
+
31
+ # Make a text from the corpus with state size 3
32
+ text = RubyMarkovify::Text.new(corpus, 3)
33
+
34
+ puts text.make_sentence # Generates a random sentence
35
+ puts text.make_sentence_with_start('I have') # Generates a random sentence starting with 'I have'
36
+ puts text.make_short_sentence(40) # Generates a random sentence at most 40 characters long
37
+ ```
38
+
39
+ In addition to the default markovify `Text` and `NewlineText` classes, ruby_markovify also includes an `ArrayText` class that uses an array (already split into sentences) as its corpus.
40
+
41
+ Note that ruby_markovify currently implements none of the JSON methods (`Chain#to_json`, `Chain.from_json` and `Text.from_chain`), so if you need them you'll have to implement them yourself (feel free to make a PR!)
42
+
43
+ ## Development
44
+
45
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
46
+
47
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
48
+
49
+ ## Contributing
50
+
51
+ Bug reports and pull requests are welcome on GitHub at https://github.com/meew0/ruby_markovify.
52
+
53
+
54
+ ## License
55
+
56
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
57
+
@@ -0,0 +1,2 @@
1
+ require "bundler/gem_tasks"
2
+ task :default => :spec
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "ruby_markovify"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,4 @@
1
+ require 'ruby_markovify/version'
2
+ require 'ruby_markovify/text'
3
+
4
+ module RubyMarkovify; end
@@ -0,0 +1,57 @@
1
+ module RubyMarkovify
2
+ def self.cumulative_sum(array)
3
+ sum = 0
4
+ array.map { |x| sum += x }
5
+ end
6
+
7
+ class Chain
8
+ def initialize(corpus, state_size, model = nil)
9
+ @state_size = state_size
10
+ @model = model || build(corpus, @state_size)
11
+ end
12
+
13
+ def build(corpus, state_size)
14
+ fail ArgumentError, '`corpus` must be an Array of Arrays!' unless corpus.is_a?(Array) && corpus[0].is_a?(Array)
15
+
16
+ model = {}
17
+
18
+ corpus.each do |run|
19
+ items = [:begin] * state_size + run + [:end]
20
+
21
+ 0.upto(run.length + 1) do |i|
22
+ state = items[i...i+state_size]
23
+ follow = items[i+state_size]
24
+
25
+ model[state] ||= {}
26
+ model[state][follow] ||= 0
27
+ model[state][follow] += 1
28
+ end
29
+ end
30
+
31
+ model
32
+ end
33
+
34
+ def move(state)
35
+ choices, weights = @model[state].keys, @model[state].values
36
+ cumdist = RubyMarkovify.cumulative_sum(weights)
37
+ r = rand * cumdist[-1]
38
+ choices[cumdist.index { |e| e >= r }]
39
+ end
40
+
41
+ def gen(init_state = nil)
42
+ state = init_state || [:begin] * @state_size
43
+ result = []
44
+ loop do
45
+ next_word = move(state)
46
+ break if next_word == :end
47
+ result << next_word
48
+ state = state[1..-1] + [next_word]
49
+ end
50
+ result
51
+ end
52
+
53
+ # As Ruby doesn't have the concept of generators, #gen returns an array itself,
54
+ # so we don't need to make a separate method for walk
55
+ alias_method :walk, :gen
56
+ end
57
+ end
@@ -0,0 +1,71 @@
1
+ module RubyMarkovify
2
+ module Splitters
3
+ ASCII_LOWERCASE = 'abcdefghijklmnopqrstuvwxyz'
4
+ ASCII_UPPERCASE = ASCII_LOWERCASE.upcase
5
+
6
+ # States w/ with thanks to https://github.com/unitedstates/python-us
7
+ # Titles w/ thanks to https://github.com/nytimes/emphasis and @donohoe
8
+ STATES = %w{
9
+ ala ariz ark calif colo conn del fla ga ill ind kan ky la md mass mich minn miss mo mont neb nev okla
10
+ ore pa tenn vt va wash wis wyo
11
+ }
12
+ UNITED_STATES = %w{u.s}
13
+ TITLES = %w{mr ms mrs msr dr gov pres sen sens rep reps prof gen messrs col sr jf sgt mgr fr rev jr snr atty supt}
14
+ STREETS = %w{ave blvd st rd hwy}
15
+ MONTHS = %w{jan feb mar apr jun jul aug sep sept oct nov dec}
16
+ INITIALS = ASCII_LOWERCASE.chars
17
+
18
+ ABBR_CAPPED = STATES + UNITED_STATES + TITLES + STREETS + MONTHS + INITIALS
19
+ ABBR_LOWERCASE = %w{etc v vs viz al pct}
20
+ EXCEPTIONS = %w{U.S. U.N. E.U. F.B.I. C.I.A.}
21
+
22
+ PUNCTUATION = %w{? !}
23
+
24
+ def is_abbreviation(dotted_word)
25
+ clipped = dotted_word[0..-2]
26
+ if ASCII_UPPERCASE.include? clipped[0]
27
+ ABBR_CAPPED.include? clipped.downcase
28
+ else
29
+ ABBR_LOWERCASE.include? clipped
30
+ end
31
+ end
32
+
33
+ def is_sentence_ender(word)
34
+ return false if EXCEPTIONS.include? word
35
+ return true if PUNCTUATION.include? word[-1]
36
+ return true if word.sub(/[^A-Z]/, '').length > 1
37
+ return true if word[-1] == '.' && !is_abbreviation(word)
38
+ false
39
+ end
40
+
41
+ # A word that ends with punctuation
42
+ # Followed by optional quote/parens/etc
43
+ # Followed by whitespace + non-(lowercase or dash)
44
+ END_PATTERN = /([\w\.'’&\]\)]+[\.\?!])([‘’“”'\"\)\]]*)(\s+(?![a-z\-–—]))/
45
+
46
+ def split_into_sentences(text)
47
+ res = []
48
+ text.scan(END_PATTERN) do |c|
49
+ res << [c, $~.offset(0)[0]]
50
+ end
51
+
52
+ end_indices = res.select do |e|
53
+ groups, _ = e
54
+ is_sentence_ender(groups[0])
55
+ end.map do |e|
56
+ groups, index = e
57
+ index + groups[0].length + groups[1].length
58
+ end
59
+
60
+ spans = ([nil] + end_indices).zip(end_indices + [nil])
61
+
62
+ ret = spans.map do |elem|
63
+ start_idx, end_idx = elem
64
+ next if end_idx == nil
65
+ start_idx ||= 0
66
+ text[start_idx..end_idx].strip
67
+ end
68
+ ret.compact
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,98 @@
1
+ require 'ruby_markovify/chain'
2
+ require 'ruby_markovify/splitters'
3
+ require 'unidecode'
4
+
5
+ module RubyMarkovify
6
+ class Text
7
+ def initialize(input_text, state_size = nil, chain = nil)
8
+ runs = generate_corpus(input_text)
9
+ @rejoined_text = sentence_join(runs.map { |e| word_join(e) })
10
+ state_size ||= 2
11
+ @chain = chain || Chain.new(runs, state_size)
12
+ end
13
+
14
+ include RubyMarkovify::Splitters
15
+ def sentence_split(text)
16
+ split_into_sentences(text)
17
+ end
18
+
19
+ def sentence_join(sentences)
20
+ sentences.join ' '
21
+ end
22
+
23
+ WORD_SPLIT_PATTERN = /\s+/
24
+ def word_split(sentence)
25
+ sentence.split(WORD_SPLIT_PATTERN)
26
+ end
27
+
28
+ def word_join(words)
29
+ words.join ' '
30
+ end
31
+
32
+ REJECT_PATTERN = /(^')|('$)|\s'|'\s|["(\(\)\[\])]/
33
+
34
+ def test_sentence_input(sentence)
35
+ !!(sentence.to_ascii =~ REJECT_PATTERN)
36
+ end
37
+
38
+ def generate_corpus(text)
39
+ sentences = sentence_split text
40
+ sentences.reject! { |e| test_sentence_input(e) }
41
+ sentences.map { |e| word_split(e) }
42
+ end
43
+
44
+ def test_sentence_output(words, max_overlap_ratio, max_overlap_total)
45
+ overlap_ratio = (max_overlap_ratio * words.length).round
46
+ overlap_max = [max_overlap_total, overlap_ratio].min
47
+ overlap_over = overlap_max + 1
48
+ gram_count = [words.length - overlap_max, 1].max
49
+
50
+ grams = [*0..gram_count].map { |i| words[i..i+overlap_over] }
51
+ grams.each do |g|
52
+ gram_joined = word_join(g)
53
+ return false if @rejoined_text.include? gram_joined
54
+ end
55
+
56
+ true
57
+ end
58
+
59
+ DEFAULT_MAX_OVERLAP_RATIO = 0.7
60
+ DEFAULT_MAX_OVERLAP_TOTAL = 15
61
+ DEFAULT_TRIES = 10
62
+
63
+ def make_sentence(init_state = nil, options = {})
64
+ tries = options[:tries] || DEFAULT_TRIES
65
+ mor = options[:max_overlap_ratio] || DEFAULT_MAX_OVERLAP_RATIO
66
+ mot = options[:max_overlap_total] || DEFAULT_MAX_OVERLAP_TOTAL
67
+
68
+ tries.times do
69
+ words = @chain.walk(init_state)
70
+ return word_join(words) if test_sentence_output(words, mor, mot)
71
+ end
72
+ nil
73
+ end
74
+
75
+ def make_short_sentence(char_limit, options = {})
76
+ loop do
77
+ sentence = make_sentence(nil, options)
78
+ return sentence if sentence && sentence.length < char_limit
79
+ end
80
+ end
81
+
82
+ def make_sentence_with_start(beginning, options = {})
83
+ make_sentence(word_split(beginning), options)
84
+ end
85
+ end
86
+
87
+ class NewlineText < Text
88
+ def sentence_split(text)
89
+ text.split /\s*\n\s*/
90
+ end
91
+ end
92
+
93
+ class ArrayText < Text
94
+ def sentence_split(text)
95
+ text
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,3 @@
1
+ module RubyMarkovify
2
+ VERSION = '0.1.0'
3
+ end
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'ruby_markovify/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'ruby_markovify'
8
+ spec.version = RubyMarkovify::VERSION
9
+ spec.authors = ['meew0']
10
+ spec.email = ['blactbt@live.de']
11
+
12
+ spec.summary = %q{A Ruby port of the excellent `markovify` Python module.}
13
+ spec.homepage = 'https://github.com/meew0/ruby_markovify'
14
+ spec.license = 'MIT'
15
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
16
+ spec.bindir = 'exe'
17
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
18
+ spec.require_paths = ['lib']
19
+
20
+ spec.add_dependency 'unidecode'
21
+
22
+ spec.add_development_dependency 'bundler', '~> 1.12.a'
23
+ spec.add_development_dependency 'rake', '~> 10.0'
24
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby_markovify
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - meew0
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2016-02-21 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: unidecode
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: bundler
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 1.12.a
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 1.12.a
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ description:
56
+ email:
57
+ - blactbt@live.de
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - Gemfile
64
+ - LICENSE.txt
65
+ - MARKOVIFY_LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - bin/console
69
+ - bin/setup
70
+ - lib/ruby_markovify.rb
71
+ - lib/ruby_markovify/chain.rb
72
+ - lib/ruby_markovify/splitters.rb
73
+ - lib/ruby_markovify/text.rb
74
+ - lib/ruby_markovify/version.rb
75
+ - ruby_markovify.gemspec
76
+ homepage: https://github.com/meew0/ruby_markovify
77
+ licenses:
78
+ - MIT
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.5.1
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: A Ruby port of the excellent `markovify` Python module.
100
+ test_files: []