markov_words 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -2
- data/Gemfile.lock +1 -1
- data/Rakefile +6 -6
- data/bin/console +5 -5
- data/lib/markov_words/file_store.rb +39 -0
- data/lib/markov_words/version.rb +1 -1
- data/lib/markov_words/words.rb +213 -0
- data/lib/markov_words.rb +3 -223
- data/markov_words.gemspec +16 -17
- metadata +14 -12
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5dda90c58c74410faf33ed06402150326be2afe8
|
4
|
+
data.tar.gz: 77cfaf41ef3314807e06b2ef12f5aa7bc42cbe4a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 14d952d8db7fe1f162bccf3c6ab9f1fcc389ff5cf91000f5f0bb87761860592148f4cd2bdc1fdb82e83ba9037316181f8c6987e10f62dc52c248037f5a902b7c
|
7
|
+
data.tar.gz: 7ab05abcd9938125c56c76f82eec3eab0d46cc3588a4b120f9736830a19a334c988eac762a09360523128889654b76dad03002caf4c8be6ea4017d8ce6b9846f
|
data/Gemfile
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
source
|
1
|
+
source 'https://rubygems.org'
|
2
2
|
|
3
|
-
git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
|
3
|
+
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
4
4
|
|
5
5
|
# Specify your gem's dependencies in markov_words.gemspec
|
6
6
|
gemspec
|
data/Gemfile.lock
CHANGED
data/Rakefile
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
3
3
|
|
4
4
|
Rake::TestTask.new(:test) do |t|
|
5
|
-
t.libs <<
|
6
|
-
t.libs <<
|
7
|
-
t.test_files = FileList[
|
5
|
+
t.libs << 'test'
|
6
|
+
t.libs << 'lib'
|
7
|
+
t.test_files = FileList['test/**/*_test.rb']
|
8
8
|
end
|
9
9
|
|
10
|
-
task :
|
10
|
+
task default: :test
|
data/bin/console
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'markov_words'
|
5
5
|
|
6
6
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
8
8
|
|
9
9
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
require
|
10
|
+
require 'pry'
|
11
11
|
Pry.start
|
12
12
|
|
13
|
-
#require
|
14
|
-
#IRB.start(__FILE__)
|
13
|
+
# require 'irb'
|
14
|
+
# IRB.start(__FILE__)
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'securerandom'
|
2
|
+
|
3
|
+
module MarkovWords
|
4
|
+
# Utility for persisting arbitrary data to disk as Marshal'ed Ruby objects
|
5
|
+
class FileStore
|
6
|
+
attr_reader :file_path
|
7
|
+
attr_reader :data
|
8
|
+
|
9
|
+
# @option opts [String] :file_path Path and name for where the file should
|
10
|
+
# be stored.
|
11
|
+
# @option opts [Boolean] :flush_data Do you want the file to be cleared on
|
12
|
+
# open?
|
13
|
+
def initialize(opts)
|
14
|
+
@file_path = opts.fetch :file_path, "/tmp/#{SecureRandom.base64}"
|
15
|
+
delete_if_exists(@file_path) if opts[:flush_data]
|
16
|
+
end
|
17
|
+
|
18
|
+
# Store arbitary data into file storage
|
19
|
+
# @param data [Object] Any Marshal-able object
|
20
|
+
def store_data(data)
|
21
|
+
File.open(@file_path, 'wb') { |f| Marshal.dump(data, f) }
|
22
|
+
end
|
23
|
+
|
24
|
+
# Retrieve whatever data is stored in the file + return it
|
25
|
+
def retrieve_data
|
26
|
+
result = nil
|
27
|
+
if File.exist?(@file_path)
|
28
|
+
File.open(@file_path, 'r') { |f| result = Marshal.load(f) }
|
29
|
+
end
|
30
|
+
result
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def delete_if_exists(path)
|
36
|
+
File.delete path if File.exist? path
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/markov_words/version.rb
CHANGED
@@ -0,0 +1,213 @@
|
|
1
|
+
module MarkovWords
|
2
|
+
# This class takes care of word generation, caching, and data storage.
|
3
|
+
class Words
|
4
|
+
# Perform caching? Defaults to true.
|
5
|
+
attr_reader :cache
|
6
|
+
# File location where you want to store the cache
|
7
|
+
attr_reader :cache_file
|
8
|
+
# How many words you want to store in the cache?
|
9
|
+
attr_reader :cache_size
|
10
|
+
# Object for storing + retrieving cache data from persistent storage
|
11
|
+
attr_reader :cache_store
|
12
|
+
# Your dictionary of words. Defaults to /usr/share/dict/words.
|
13
|
+
attr_reader :corpus_file
|
14
|
+
# Where should your database be stored on disk?
|
15
|
+
attr_reader :data_file
|
16
|
+
# Object for storing + retrieving n-gram data from persistent storage
|
17
|
+
attr_reader :data_store
|
18
|
+
# The database of "grams" (word/count combinations), stored on the disk and
|
19
|
+
# loaded into this variable in memory when generating words.
|
20
|
+
attr_reader :grams
|
21
|
+
# Number of n-grams to compute for your database. Defaults to 2
|
22
|
+
attr_reader :gram_size
|
23
|
+
# Max generated word length. Defaults to 16
|
24
|
+
attr_reader :max_length
|
25
|
+
# Minimum generated word length. Defaults to 3. NOTE: If your corpus size
|
26
|
+
# is very small (<1000 words or so), it's hard to guarantee a min_length
|
27
|
+
# because so many n-grams will have no association, which terminates word
|
28
|
+
# generation.
|
29
|
+
attr_reader :min_length
|
30
|
+
|
31
|
+
# Create a new "Words" object
|
32
|
+
# @param opts [Hash] options sent to the object. Any of the object
|
33
|
+
# attributes (eg `:cache_file` or `:gram_size`) are valid parameters to
|
34
|
+
# add to the `opts` hash.
|
35
|
+
# @return [Words] A `MarkovWords::Words` object.
|
36
|
+
def initialize(opts = {})
|
37
|
+
@grams = nil
|
38
|
+
@gram_size = opts.fetch :gram_size, 2
|
39
|
+
@max_length = opts.fetch :max_length, 16
|
40
|
+
@min_length = opts.fetch :min_length, 3
|
41
|
+
|
42
|
+
initialize_cache(opts)
|
43
|
+
initialize_data(opts)
|
44
|
+
end
|
45
|
+
|
46
|
+
# "Top off" the cache of stored words, and ensure that it's at
|
47
|
+
# `@cache_size`. If `@cache` is set to `false`, returns an empty array.
|
48
|
+
# @return [Array<String>] All words in the cache.
|
49
|
+
def refresh_cache
|
50
|
+
if @cache
|
51
|
+
words_array = @cache_store.retrieve_data
|
52
|
+
words_array << generate_word while words_array.length < @cache_size
|
53
|
+
@cache_store.store_data words_array
|
54
|
+
words_array
|
55
|
+
else
|
56
|
+
[]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Generate a new word, or return one from the cache if available.
|
61
|
+
# @return [String] The word.
|
62
|
+
def word
|
63
|
+
if @cache
|
64
|
+
load_word_from_cache
|
65
|
+
else
|
66
|
+
generate_word
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def initialize_cache(opts)
|
73
|
+
@cache = opts.fetch :cache, true
|
74
|
+
@cache_file = opts.fetch :cache_file,
|
75
|
+
"tmp/markov_words_#{@gram_size}.cache"
|
76
|
+
@cache_size = opts.fetch :cache_size, 70
|
77
|
+
@cache_store = FileStore.new(file_path: @cache_file)
|
78
|
+
end
|
79
|
+
|
80
|
+
def initialize_data(opts)
|
81
|
+
@corpus_file = opts.fetch :corpus_file,
|
82
|
+
'/usr/share/dict/words'
|
83
|
+
@data_file = opts.fetch :data_file,
|
84
|
+
"tmp/markov_words_#{@gram_size}.data"
|
85
|
+
@data_store = FileStore.new(file_path: @data_file)
|
86
|
+
end
|
87
|
+
|
88
|
+
def contains_vowel?(ary)
|
89
|
+
if ary.length < 2
|
90
|
+
true
|
91
|
+
else
|
92
|
+
ary.take(2).join.match(/[aeiou]/)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
# Generates an English (by default) -sounding word.
|
97
|
+
def generate_word
|
98
|
+
set_grams if @grams.nil?
|
99
|
+
generate_gram_array(generate_word_length).join
|
100
|
+
end
|
101
|
+
|
102
|
+
def generate_gram_array(desired_length)
|
103
|
+
gram = ''
|
104
|
+
gram_array = generate_initial_gram_array
|
105
|
+
until gram_array.join.length == desired_length || gram.nil?
|
106
|
+
# grab last @gram_size (or possibly fewer if the array is too small)
|
107
|
+
# elements from the current gram_array, to use as the next key.
|
108
|
+
gal = gram_array.length
|
109
|
+
current_gram_size = gal >= @gram_size ? @gram_size : gal
|
110
|
+
key = gram_array[-current_gram_size..-1].join
|
111
|
+
|
112
|
+
gram = pick_random_char(@grams[key])
|
113
|
+
gram_array << gram
|
114
|
+
end
|
115
|
+
gram_array
|
116
|
+
end
|
117
|
+
|
118
|
+
# Set initial array of chars, which is taken from the @grams key list.
|
119
|
+
# must contain a vowel in the first 2 chars (unless @gram_size == 1 in
|
120
|
+
# which case any letter).
|
121
|
+
def generate_initial_gram_array
|
122
|
+
initial_gram_array = []
|
123
|
+
|
124
|
+
all_grams_array = @grams.to_a
|
125
|
+
gram_min_length = @gram_size < @min_length ? @gram_size : @min_length
|
126
|
+
until initial_gram_array.length >= gram_min_length &&
|
127
|
+
contains_vowel?(initial_gram_array)
|
128
|
+
initial_gram_array = all_grams_array.sample[0].chars
|
129
|
+
end
|
130
|
+
initial_gram_array
|
131
|
+
end
|
132
|
+
|
133
|
+
# The word must be a random length, between @min and @max
|
134
|
+
def generate_word_length
|
135
|
+
word_length = 0
|
136
|
+
until word_length >= @min_length
|
137
|
+
word_length = SecureRandom.rand(@max_length)
|
138
|
+
end
|
139
|
+
word_length
|
140
|
+
end
|
141
|
+
|
142
|
+
def load_word_from_cache
|
143
|
+
words_array = @cache_store.retrieve_data
|
144
|
+
if words_array.nil? || words_array.empty?
|
145
|
+
words_array = Array.new(@cache_size) { generate_word }
|
146
|
+
end
|
147
|
+
|
148
|
+
word = words_array.pop
|
149
|
+
cache_store.store_data words_array
|
150
|
+
|
151
|
+
word
|
152
|
+
end
|
153
|
+
|
154
|
+
# Generate a MarkovWords corpus from a datafile, with a given size of
|
155
|
+
# n-gram. Returns a hash of "grams", which are a map of a letter to the
|
156
|
+
# frequency of the letters that follow it, eg: {"c" => {"a" => 1, "b" =>
|
157
|
+
# 2}}
|
158
|
+
def markov_corpus(file, gram_size)
|
159
|
+
grams = {}
|
160
|
+
|
161
|
+
# Corpus contains a list of words, separated by newlines
|
162
|
+
File.foreach(file) do |word|
|
163
|
+
word = word.downcase.delete('-')
|
164
|
+
gram_size.downto(1) do |current_gram_size|
|
165
|
+
markov_update_count! grams, word, current_gram_size
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
grams
|
170
|
+
end
|
171
|
+
|
172
|
+
# Given a database of `grams` and a `word`, and the `gram_size` (the
|
173
|
+
# maximum n-gram size we want to compute), update the `grams` database with
|
174
|
+
# entries for each n-gram combination starting at `gram_size` and going
|
175
|
+
# down to 1.
|
176
|
+
def markov_update_count!(grams, word, gram_size)
|
177
|
+
word.chars.each_cons(gram_size + 1) do |gram|
|
178
|
+
l = gram[0..gram_size - 1].join
|
179
|
+
r = gram[gram_size]
|
180
|
+
|
181
|
+
unless l.empty? || r.empty? || line_ending?(r)
|
182
|
+
grams[l] = {} if grams[l].nil?
|
183
|
+
grams[l][r] = grams[l][r].nil? ? 1 : grams[l][r] += 1
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
# Given a hash in the format: {"c" => {"a" => 1, "b" => 2}}, grab a random
|
189
|
+
# element from the values hash, accurate to the distribution of counts.
|
190
|
+
# In the example hash above, "a" would have a 33% chance of being chosen,
|
191
|
+
# while "b" would have a 66% chance (1/2 ratio).
|
192
|
+
def pick_random_char(counts_hash = {})
|
193
|
+
total = counts_hash.values.sum
|
194
|
+
pick_num = SecureRandom.rand(total)
|
195
|
+
counter = 0
|
196
|
+
counts_hash.each do |char, count|
|
197
|
+
counter += count
|
198
|
+
return char if counter >= pick_num
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def line_ending?(word)
|
203
|
+
word.include?("\n")
|
204
|
+
end
|
205
|
+
|
206
|
+
def set_grams
|
207
|
+
grams = @data_store.retrieve_data ||
|
208
|
+
markov_corpus(@corpus_file, @gram_size)
|
209
|
+
@data_store.store_data grams unless grams == @grams
|
210
|
+
@grams = grams
|
211
|
+
end
|
212
|
+
end
|
213
|
+
end
|
data/lib/markov_words.rb
CHANGED
@@ -1,228 +1,8 @@
|
|
1
|
-
require
|
1
|
+
require 'markov_words/version'
|
2
|
+
require 'markov_words/words'
|
3
|
+
require 'markov_words/file_store'
|
2
4
|
require 'securerandom'
|
3
5
|
|
4
6
|
# @author Donald L. Merand
|
5
|
-
# A nice library for generating random words (not sentences) using Markov
|
6
|
-
# chains.
|
7
7
|
module MarkovWords
|
8
|
-
|
9
|
-
# This class takes care of word generation, caching, and data storage.
|
10
|
-
class Words
|
11
|
-
# Perform caching? Defaults to true.
|
12
|
-
attr :cache
|
13
|
-
# File location where you want to store the cache
|
14
|
-
attr :cache_file
|
15
|
-
# How many words you want to store in the cache?
|
16
|
-
attr :cache_size
|
17
|
-
# Your dictionary of words. Defaults to /usr/share/dict/words.
|
18
|
-
attr :corpus_file
|
19
|
-
# Where should your database be stored on disk?
|
20
|
-
attr :data_file
|
21
|
-
# The database of "grams" (word/count combinations), stored on the disk and
|
22
|
-
# loaded into this variable in memory when generating words.
|
23
|
-
attr :grams
|
24
|
-
# Number of n-grams to compute for your database. Defaults to 2
|
25
|
-
attr :gram_size
|
26
|
-
# Max generated word length. Defaults to 16
|
27
|
-
attr :max_length
|
28
|
-
# Minimum generated word length. Defaults to 3. NOTE: If your corpus size
|
29
|
-
# is very small (<1000 words or so), it's hard to guarantee a min_length
|
30
|
-
# because so many n-grams will have no association, which terminates word
|
31
|
-
# generation.
|
32
|
-
attr :min_length
|
33
|
-
|
34
|
-
# Create a new "Words" object
|
35
|
-
# @param opts [Hash] options sent to the object. Any of the object
|
36
|
-
# attributes (eg `:cache_file` or `:gram_size`) are valid parameters to
|
37
|
-
# add to the `opts` hash.
|
38
|
-
# @return [Words] A `MarkovWords::Words` object.
|
39
|
-
def initialize(opts = {})
|
40
|
-
@gram_size = opts.fetch :gram_size, 2
|
41
|
-
@max_length = opts.fetch :max_length, 16
|
42
|
-
@min_length = opts.fetch :min_length, 3
|
43
|
-
|
44
|
-
@cache = opts.fetch :cache, true
|
45
|
-
@cache_file = opts.fetch :cache_file,
|
46
|
-
"tmp/markov_words_#{@gram_size}.cache"
|
47
|
-
@cache_size = opts.fetch :cache_size, 70
|
48
|
-
@corpus_file = opts.fetch :corpus_file,
|
49
|
-
'/usr/share/dict/words'
|
50
|
-
@data_file = opts.fetch :data_file,
|
51
|
-
"tmp/markov_words_#{@gram_size}.data"
|
52
|
-
@grams = nil
|
53
|
-
end
|
54
|
-
|
55
|
-
# "Top off" the cache of stored words, and ensure that it's at
|
56
|
-
# `@cache_size`. If `@cache` is set to `false`, returns an empty array.
|
57
|
-
# @return [Array<String>] All words in the cache.
|
58
|
-
def refresh_cache
|
59
|
-
if @cache
|
60
|
-
words_array = load_from_file(@cache_file) || []
|
61
|
-
|
62
|
-
while words_array.length < @cache_size
|
63
|
-
words_array << generate_word
|
64
|
-
end
|
65
|
-
|
66
|
-
save_to_file(@cache_file, words_array)
|
67
|
-
words_array
|
68
|
-
else
|
69
|
-
[]
|
70
|
-
end
|
71
|
-
end
|
72
|
-
|
73
|
-
# Generate a new word, or return one from the cache if available.
|
74
|
-
# @return [String] The word.
|
75
|
-
def word
|
76
|
-
if @cache
|
77
|
-
load_word_from_cache
|
78
|
-
else
|
79
|
-
generate_word
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
private
|
84
|
-
|
85
|
-
def contains_vowel?(ary)
|
86
|
-
if ary.length < 2
|
87
|
-
true
|
88
|
-
else
|
89
|
-
ary.take(2).join.match(/[aeiou]/)
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
# Generates an English (by default)- sounding word.
|
94
|
-
def generate_word
|
95
|
-
set_grams if @grams.nil?
|
96
|
-
|
97
|
-
gram = ''
|
98
|
-
gram_array = []
|
99
|
-
|
100
|
-
# The word must be a random length, between @min and @max
|
101
|
-
desired_length = 0
|
102
|
-
until desired_length >= @min_length
|
103
|
-
desired_length = SecureRandom.rand(@max_length)
|
104
|
-
end
|
105
|
-
|
106
|
-
# Set initial array of chars, which is taken from the @grams key list. must
|
107
|
-
# contain a vowel in the first 2 chars (unless @gram_size == 1 in which
|
108
|
-
# case any letter).
|
109
|
-
all_grams_array = @grams.to_a
|
110
|
-
gram_min_length = @gram_size < @min_length ? @gram_size : @min_length
|
111
|
-
until gram_array.length >= gram_min_length && contains_vowel?(gram_array)
|
112
|
-
gram_array = all_grams_array.sample[0].chars
|
113
|
-
end
|
114
|
-
|
115
|
-
until gram_array.join.length == desired_length || gram.nil?
|
116
|
-
# grab last @gram_size (or possibly fewer if the array is too small)
|
117
|
-
# elements from the current gram_array, to use as the next key.
|
118
|
-
gal = gram_array.length
|
119
|
-
current_gram_size = gal >= @gram_size ? @gram_size : gal
|
120
|
-
key = gram_array[-current_gram_size..-1].join
|
121
|
-
|
122
|
-
gram = pick_random_char(@grams[key])
|
123
|
-
gram_array << gram
|
124
|
-
end
|
125
|
-
|
126
|
-
gram_array.join
|
127
|
-
end
|
128
|
-
|
129
|
-
def generate_words_array
|
130
|
-
@cache_size.times.map { generate_word }
|
131
|
-
end
|
132
|
-
|
133
|
-
def load_from_file(file)
|
134
|
-
result = nil
|
135
|
-
if File.exist?(file)
|
136
|
-
File.open(file, 'r') {|f| result = Marshal.load(f)}
|
137
|
-
end
|
138
|
-
result
|
139
|
-
end
|
140
|
-
|
141
|
-
def load_word_from_cache
|
142
|
-
words_array = load_from_file(@cache_file)
|
143
|
-
if words_array.nil? || words_array.empty?
|
144
|
-
words_array = generate_words_array
|
145
|
-
end
|
146
|
-
|
147
|
-
word = words_array.pop
|
148
|
-
save_to_file(@cache_file, words_array)
|
149
|
-
|
150
|
-
word
|
151
|
-
end
|
152
|
-
|
153
|
-
# Generate a MarkovWords corpus from a datafile, with a given size of n-gram.
|
154
|
-
# Returns a hash of "grams", which are a map of a letter to the frequency of
|
155
|
-
# the letters that follow it, eg: {"c" => {"a" => 1, "b" => 2}}
|
156
|
-
def markov_corpus(file, gram_size)
|
157
|
-
grams = {}
|
158
|
-
|
159
|
-
# Corpus contains a list of words, separated by newlines
|
160
|
-
File.foreach(file) do |word|
|
161
|
-
word = word.downcase.gsub(/-/, '')
|
162
|
-
gram_size.downto(1) do |current_size|
|
163
|
-
word.chars.each_cons(current_size + 1) do |gram|
|
164
|
-
first = gram[0..current_size - 1].join
|
165
|
-
second = gram[current_size]
|
166
|
-
|
167
|
-
unless first.empty? || second.empty? || is_line_ending?(second)
|
168
|
-
update_count(grams, first, second)
|
169
|
-
end
|
170
|
-
end
|
171
|
-
end
|
172
|
-
end
|
173
|
-
|
174
|
-
grams
|
175
|
-
end
|
176
|
-
|
177
|
-
# Given a hash in the format: {"c" => {"a" => 1, "b" => 2}}, grab a random
|
178
|
-
# element from the values hash, accurate to the distribution of counts.
|
179
|
-
# In the example hash above, "a" would have a 33% chance of being chosen,
|
180
|
-
# while "b" would have a 66% chance (1/2 ratio).
|
181
|
-
def pick_random_char(counts_hash = {})
|
182
|
-
if counts_hash.nil?
|
183
|
-
return nil
|
184
|
-
else
|
185
|
-
total = counts_hash.values.sum
|
186
|
-
pick_num = SecureRandom.rand(total)
|
187
|
-
counter = 0
|
188
|
-
counts_hash.each do |char, count|
|
189
|
-
counter += count
|
190
|
-
return char if counter >= pick_num
|
191
|
-
end
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
195
|
-
def is_line_ending?(word)
|
196
|
-
word.include?("\n")
|
197
|
-
end
|
198
|
-
|
199
|
-
# Marshal a Ruby object to file storage
|
200
|
-
def save_to_file(file, data)
|
201
|
-
File.open(file, 'wb') {|f| Marshal.dump(data, f)}
|
202
|
-
end
|
203
|
-
|
204
|
-
def set_grams
|
205
|
-
if File.exist? @data_file
|
206
|
-
@grams = load_from_file(@data_file)
|
207
|
-
else
|
208
|
-
@grams = markov_corpus(@corpus_file, @gram_size)
|
209
|
-
save_to_file(@data_file, @grams)
|
210
|
-
end
|
211
|
-
end
|
212
|
-
|
213
|
-
# Given a @grams entry, update the count of "second" in "first"
|
214
|
-
#
|
215
|
-
# Example:
|
216
|
-
# update_count({"a" => {"b" => 1}}, "a", "b")
|
217
|
-
# => {"a" => {"b" => 2}}
|
218
|
-
def update_count(grams, first, second)
|
219
|
-
grams[first] = {} if grams[first].nil?
|
220
|
-
if grams[first][second].nil?
|
221
|
-
grams[first][second] = 1
|
222
|
-
else
|
223
|
-
grams[first][second] += 1
|
224
|
-
end
|
225
|
-
end
|
226
|
-
|
227
|
-
end
|
228
8
|
end
|
data/markov_words.gemspec
CHANGED
@@ -1,29 +1,28 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path("../lib", __FILE__)
|
1
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
3
|
+
require 'markov_words/version'
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
6
|
+
spec.name = 'markov_words'
|
8
7
|
spec.version = MarkovWords::VERSION
|
9
|
-
spec.authors = [
|
10
|
-
spec.email = [
|
8
|
+
spec.authors = ['Donald Merand']
|
9
|
+
spec.email = ['dmerand@explo.org']
|
11
10
|
|
12
|
-
spec.summary = %
|
13
|
-
spec.description = %
|
14
|
-
spec.homepage =
|
15
|
-
spec.license =
|
11
|
+
spec.summary = %{Generate words (not sentences) using Markov-chain techniques.}
|
12
|
+
spec.description = %{It's often nice to have random English-sounding words, eg. for password generators. This library uses Markov-chain techniques on words, as opposed to many others which focus on sentences.}
|
13
|
+
spec.homepage = 'https://github.com/exploration/markov_words'
|
14
|
+
spec.license = 'MIT'
|
16
15
|
|
17
16
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
18
17
|
f.match(%r{^(test|spec|features)/})
|
19
18
|
end
|
20
|
-
spec.bindir =
|
19
|
+
spec.bindir = 'exe'
|
21
20
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
22
|
-
spec.require_paths = [
|
21
|
+
spec.require_paths = ['lib']
|
23
22
|
|
24
|
-
spec.add_development_dependency
|
25
|
-
spec.add_development_dependency
|
26
|
-
spec.add_development_dependency
|
27
|
-
spec.add_development_dependency
|
28
|
-
spec.add_development_dependency
|
23
|
+
spec.add_development_dependency 'bundler', '~> 1.16'
|
24
|
+
spec.add_development_dependency 'minitest', '~> 5.0'
|
25
|
+
spec.add_development_dependency 'pry', '~> 0.11'
|
26
|
+
spec.add_development_dependency 'rake', '~> 10.0'
|
27
|
+
spec.add_development_dependency 'yard', '~> 0.6'
|
29
28
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markov_words
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Donald Merand
|
@@ -25,47 +25,47 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.16'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: minitest
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '5.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '5.0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: pry
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '0.11'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '0.11'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: rake
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '0
|
61
|
+
version: '10.0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '0
|
68
|
+
version: '10.0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: yard
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,9 +80,9 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0.6'
|
83
|
-
description:
|
83
|
+
description: It's often nice to have random English-sounding words, eg. for password
|
84
84
|
generators. This library uses Markov-chain techniques on words, as opposed to many
|
85
|
-
others which focus on sentences.
|
85
|
+
others which focus on sentences.
|
86
86
|
email:
|
87
87
|
- dmerand@explo.org
|
88
88
|
executables: []
|
@@ -101,7 +101,9 @@ files:
|
|
101
101
|
- bin/console
|
102
102
|
- bin/setup
|
103
103
|
- lib/markov_words.rb
|
104
|
+
- lib/markov_words/file_store.rb
|
104
105
|
- lib/markov_words/version.rb
|
106
|
+
- lib/markov_words/words.rb
|
105
107
|
- markov_words.gemspec
|
106
108
|
homepage: https://github.com/exploration/markov_words
|
107
109
|
licenses:
|