markov_noodles 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/markov_noodles.rb +95 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 25c202c27f6207b467221cbf0d8a009ee19638f6
|
4
|
+
data.tar.gz: 747f38c46ad2775a86b10c6469d434597911aef4
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 6b097a3bb1b3973b4fd1780c5b8d3d1000918e4ef42dab87f7c2401b1f67cb86eb8cf2869778ba81b613d374c11a766ecde8060b590853ee1c00da5274c1693c
|
7
|
+
data.tar.gz: 9ff568d2373c0003c4e2982d6a0156815bbe672f0560f274c963d855521956a698d20c3a2d1be76534c0317ae15d4c0b9a2b1154abf417cfad199b8cc062e8c2
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'msgpack'
|
2
|
+
|
3
|
+
class MarkovNoodles
|
4
|
+
attr_reader :dictionary
|
5
|
+
attr_reader :depth
|
6
|
+
|
7
|
+
def initialize(depth = 2)
|
8
|
+
@depth = depth
|
9
|
+
@dictionary = {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def analyse_file(filename)
|
13
|
+
analyse_string(File.read(filename))
|
14
|
+
end
|
15
|
+
|
16
|
+
def analyse_string(text)
|
17
|
+
current_words = Array.new(depth)
|
18
|
+
text_array = split_text_to_array(text)
|
19
|
+
while text_array.length > 0
|
20
|
+
next_word = text_array.shift
|
21
|
+
add_words(current_words.dup, next_word)
|
22
|
+
current_words.push next_word
|
23
|
+
current_words.shift
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def save_dictionary(filename)
|
28
|
+
File.open(filename, 'w') do |file|
|
29
|
+
file.write @dictionary.to_msgpack
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def load_dictionary(filename)
|
34
|
+
@dictionary = MessagePack.unpack(File.read(filename))
|
35
|
+
end
|
36
|
+
|
37
|
+
def generate_n_sentences(n)
|
38
|
+
text = ''
|
39
|
+
n.times do |i|
|
40
|
+
text.concat(generate_sentence)
|
41
|
+
is_last_sentence = i == (n - 1)
|
42
|
+
text.concat(' ') unless is_last_sentence
|
43
|
+
end
|
44
|
+
text
|
45
|
+
end
|
46
|
+
|
47
|
+
def generate_sentence
|
48
|
+
current_words = Array.new(depth)
|
49
|
+
sentence_array = []
|
50
|
+
loop do
|
51
|
+
new_word = current_words.last
|
52
|
+
sentence_array.push new_word if new_word
|
53
|
+
break if end_word?(new_word)
|
54
|
+
next_word_options = @dictionary[current_words]
|
55
|
+
if next_word_options.nil? && !end_word?(new_word)
|
56
|
+
new_word.concat('.')
|
57
|
+
break
|
58
|
+
end
|
59
|
+
next_word = next_word_options.sample
|
60
|
+
current_words.push next_word
|
61
|
+
current_words.shift
|
62
|
+
end
|
63
|
+
sentence_array.join(' ')
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
# Splits a text into array and inserts @depth nils after each sentence.
|
69
|
+
#
|
70
|
+
# This way generated texts can start with any word that is at the beginning of
|
71
|
+
# any sentence in analysed text, instead of always starting with the first
|
72
|
+
# word from the text.
|
73
|
+
def split_text_to_array(text)
|
74
|
+
text_array = []
|
75
|
+
text.split.each do |word|
|
76
|
+
text_array.push word
|
77
|
+
if end_word?(word)
|
78
|
+
depth.times do
|
79
|
+
text_array.push nil
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
text_array
|
84
|
+
end
|
85
|
+
|
86
|
+
def add_words(preceding, followedby)
|
87
|
+
@dictionary[preceding] ||= []
|
88
|
+
@dictionary[preceding].push followedby
|
89
|
+
end
|
90
|
+
|
91
|
+
# Checks if word ends with one of following characters .?$
|
92
|
+
def end_word?(word)
|
93
|
+
!word.nil? && !(word =~ /^*+[?\.!]$/).nil?
|
94
|
+
end
|
95
|
+
end
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: markov_noodles
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Filip Defar
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-08-11 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Noodles uses Markov chains to generate superficially real-looking text
|
14
|
+
given a sample document.
|
15
|
+
email: filip.defar@gmail.com
|
16
|
+
executables: []
|
17
|
+
extensions: []
|
18
|
+
extra_rdoc_files: []
|
19
|
+
files:
|
20
|
+
- lib/markov_noodles.rb
|
21
|
+
homepage: https://github.com/dabrorius/noodles
|
22
|
+
licenses:
|
23
|
+
- MIT
|
24
|
+
metadata: {}
|
25
|
+
post_install_message:
|
26
|
+
rdoc_options: []
|
27
|
+
require_paths:
|
28
|
+
- lib
|
29
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
requirements: []
|
40
|
+
rubyforge_project:
|
41
|
+
rubygems_version: 2.4.5
|
42
|
+
signing_key:
|
43
|
+
specification_version: 4
|
44
|
+
summary: Simple markov chain implementation
|
45
|
+
test_files: []
|
46
|
+
has_rdoc:
|