markovite 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -1
- data/config/environment.rb +2 -1
- data/lib/markovite.rb +40 -15
- data/lib/markovite/chainer.rb +3 -2
- data/lib/markovite/dict.rb +29 -13
- data/lib/markovite/splitter.rb +7 -6
- data/lib/markovite/version.rb +1 -1
- data/markovite.gemspec +1 -1
- metadata +16 -4
- data/pkg/markovite-0.2.2.gem +0 -0
- data/pkg/markovite-0.2.3.gem +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 844cdac9c79870dc34add5123467ba7de998324dbceb333c3358f45b96de15a5
|
4
|
+
data.tar.gz: 3321497a835c6df6a38e34b8f211ffa1dbd1916bfe152ae071b8210b54a79c18
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d6a1d275ce80f2deef594d19baf7abd2a52652e6d3e5c034b06a0950777ef3f4ed62411c59c509f0e3e5767aac683b9f32d7075a4de26133588eb4b821042f5
|
7
|
+
data.tar.gz: bb8f7930db79db564bffeb2676200d559a26f202bcc4ed7d49d296b0ad41299d23575cc9e4f1dc0724430645e530d627d15d122379c09242522886843287ef9f
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
markovite (0.2.
|
4
|
+
markovite (0.2.5)
|
5
|
+
msgpack
|
5
6
|
|
6
7
|
GEM
|
7
8
|
remote: https://rubygems.org/
|
@@ -10,6 +11,7 @@ GEM
|
|
10
11
|
diff-lcs (1.3)
|
11
12
|
interception (0.5)
|
12
13
|
method_source (0.9.0)
|
14
|
+
msgpack (1.2.4)
|
13
15
|
pry (0.11.3)
|
14
16
|
coderay (~> 1.1.0)
|
15
17
|
method_source (~> 0.9.0)
|
data/config/environment.rb
CHANGED
data/lib/markovite.rb
CHANGED
@@ -11,9 +11,33 @@ module Markovite
|
|
11
11
|
MAX_FILENAME_LENGTH = 255
|
12
12
|
|
13
13
|
def initialize(filename = nil, dict_depth=DEFAULT_DEPTH)
|
14
|
+
initialize_children
|
14
15
|
parse_file(filename, dict_depth) if filename
|
15
16
|
end
|
16
17
|
|
18
|
+
def save(filename)
|
19
|
+
raise("Chain is empty") if dictionary.chain.empty?
|
20
|
+
msg_hash = {}
|
21
|
+
msg_hash["sentences"] = dictionary.sentences
|
22
|
+
msg_hash["chain"] = dictionary.chain
|
23
|
+
msg_hash["corpus"] = split.corpus
|
24
|
+
msg_hash["depth"] = dictionary.depth
|
25
|
+
File.open("#{filename}.msg", "w") do |file|
|
26
|
+
test = file.write(msg_hash.to_msgpack)
|
27
|
+
end
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
def load(filename)
|
32
|
+
raise("Invalid file type") if !is_valid_file_ext?(filename, /.msg\z/i)
|
33
|
+
data = File.read("#{filename}")
|
34
|
+
model = MessagePack.unpack(data)
|
35
|
+
@depth = model["depth"]
|
36
|
+
split.corpus = model["corpus"]
|
37
|
+
dictionary.sentences = model["sentences"]
|
38
|
+
dictionary.chain = model["chain"]
|
39
|
+
end
|
40
|
+
|
17
41
|
def self.combine(left_chain, right_chain, dict_depth = nil)
|
18
42
|
dict_depth = dict_depth || left_chain.depth
|
19
43
|
new_chain = Markovite::Chain.new
|
@@ -26,12 +50,13 @@ module Markovite
|
|
26
50
|
split.corpus
|
27
51
|
end
|
28
52
|
|
29
|
-
def parse_string(text, dict_depth=nil)
|
30
|
-
|
31
|
-
|
32
|
-
if chainer
|
53
|
+
def parse_string(text, dict_depth = nil)
|
54
|
+
if self.depth
|
55
|
+
depth_check(dict_depth)
|
33
56
|
add_from_text(text)
|
34
57
|
else
|
58
|
+
dict_depth = dict_depth || DEFAULT_DEPTH
|
59
|
+
is_valid_depth?(dict_depth)
|
35
60
|
new_from_text(text, dict_depth)
|
36
61
|
end
|
37
62
|
end
|
@@ -75,11 +100,7 @@ module Markovite
|
|
75
100
|
private
|
76
101
|
|
77
102
|
def depth_check(dict_depth)
|
78
|
-
|
79
|
-
raise "Chain depth conflict" if dict_depth != depth
|
80
|
-
elsif !is_valid_depth?(dict_depth)
|
81
|
-
raise "Chain depth must be between #{MIN_DEPTH} and #{MAX_DEPTH}"
|
82
|
-
end
|
103
|
+
raise "Chain depth conflict" if !dict_depth.nil? && dict_depth != depth
|
83
104
|
end
|
84
105
|
|
85
106
|
def is_valid_depth?(dict_depth)
|
@@ -91,16 +112,14 @@ module Markovite
|
|
91
112
|
end
|
92
113
|
|
93
114
|
def new_from_text(text, dict_depth)
|
94
|
-
#look into refactoring this
|
95
115
|
@depth = dict_depth
|
96
|
-
self.split = SplitSentence.new(text)
|
97
116
|
@corpus = split.corpus
|
98
|
-
|
99
|
-
self.chainer = Chainer.new(dictionary)
|
117
|
+
dictionary.expand_chain(text)
|
100
118
|
end
|
101
119
|
|
102
|
-
|
103
|
-
|
120
|
+
|
121
|
+
def is_valid_file_ext?(filename, ext = nil)
|
122
|
+
re = ext || Regexp.union(FILE_EXT)
|
104
123
|
filename.match(re)
|
105
124
|
end
|
106
125
|
|
@@ -108,6 +127,12 @@ module Markovite
|
|
108
127
|
str.length < 255 && split_words(str).length == 1
|
109
128
|
end
|
110
129
|
|
130
|
+
def initialize_children
|
131
|
+
self.split = SplitSentence.new
|
132
|
+
self.dictionary = Dictionary.new({sentence_split: split})
|
133
|
+
self.chainer = Chainer.new(dictionary)
|
134
|
+
end
|
135
|
+
|
111
136
|
def add_from_text(text)
|
112
137
|
dictionary.expand_chain(text)
|
113
138
|
end
|
data/lib/markovite/chainer.rb
CHANGED
@@ -90,8 +90,9 @@ class Chainer
|
|
90
90
|
|
91
91
|
def remove_markers(sentence)
|
92
92
|
#removes BEGINNING and ENDING markers
|
93
|
-
sentence.
|
94
|
-
sentence.
|
93
|
+
sentence.shift while sentence.first == BEGINNING
|
94
|
+
sentence.pop while sentence.last == ENDING
|
95
|
+
sentence
|
95
96
|
end
|
96
97
|
|
97
98
|
def is_valid_sentence?(sentence)
|
data/lib/markovite/dict.rb
CHANGED
@@ -6,26 +6,33 @@ class Dictionary
|
|
6
6
|
#make this a module???
|
7
7
|
BEGINNING = "__BEGIN__"
|
8
8
|
ENDING = "__END__"
|
9
|
+
DEFAULT_DEPTH = 2
|
9
10
|
|
10
|
-
attr_accessor :
|
11
|
-
attr_reader :depth
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
# as the default value
|
17
|
-
self.chain = Hash.new { |h, k| h[k] = [] }
|
18
|
-
self.sentences = sentence_split.split_text
|
19
|
-
@depth = depth
|
20
|
-
construct_chain
|
11
|
+
attr_accessor :sentence_split, :sentences
|
12
|
+
attr_reader :depth, :chain
|
13
|
+
|
14
|
+
def initialize(attributes)
|
15
|
+
attributes.each {|attribute, value| self.send("#{attribute}=", value)}
|
16
|
+
set_default
|
21
17
|
end
|
22
18
|
|
23
19
|
def has_sentence(sentence)
|
24
20
|
sentences.include?(sentence)
|
25
21
|
end
|
26
22
|
|
23
|
+
def chain=(arg)
|
24
|
+
# The following line ensures a new array is created for each new key
|
25
|
+
# instead of using the memory address of the first array created
|
26
|
+
# as the default value
|
27
|
+
@chain = Hash.new { |h, k| h[k] = [] } if self.chain.nil?
|
28
|
+
arg.each {|key, value|chain[key] = value}
|
29
|
+
chain
|
30
|
+
end
|
31
|
+
|
27
32
|
def depth=(arg)
|
28
|
-
raise "Depth cannot be changed"
|
33
|
+
raise "Depth cannot be changed" if depth
|
34
|
+
raise "Depth must be integer" if arg.class != Integer
|
35
|
+
@depth = arg
|
29
36
|
end
|
30
37
|
|
31
38
|
def expand_chain(text)
|
@@ -36,8 +43,8 @@ class Dictionary
|
|
36
43
|
end
|
37
44
|
|
38
45
|
def construct_chain(new_sentences = nil)
|
46
|
+
self.depth = DEFAULT_DEPTH if depth.nil?
|
39
47
|
new_sentences = new_sentences || sentences
|
40
|
-
raise "No sentences in memory" if new_sentences.empty?
|
41
48
|
new_sentences.each do |sentence|
|
42
49
|
words = sentence.split(" ")
|
43
50
|
# each chunk is an array that represents a state in the markov chain
|
@@ -65,4 +72,13 @@ class Dictionary
|
|
65
72
|
sentences.clear
|
66
73
|
end
|
67
74
|
|
75
|
+
private
|
76
|
+
|
77
|
+
def set_default
|
78
|
+
self.sentence_split = sentence_split || SentenceSplit.new
|
79
|
+
self.chain = chain || {}
|
80
|
+
self.sentences = sentences || sentence_split.split_text
|
81
|
+
construct_chain if chain.empty?
|
82
|
+
end
|
83
|
+
|
68
84
|
end
|
data/lib/markovite/splitter.rb
CHANGED
@@ -17,13 +17,14 @@ class SplitSentence
|
|
17
17
|
"prof.", "rev.", "rt. hon.", "sr.", "st." #personal
|
18
18
|
]
|
19
19
|
|
20
|
-
|
20
|
+
attr_reader :corpus
|
21
21
|
|
22
|
-
|
22
|
+
def initialize(corpus = nil)
|
23
|
+
self.corpus = corpus || ""
|
24
|
+
end
|
23
25
|
|
24
|
-
def
|
25
|
-
|
26
|
-
split_text
|
26
|
+
def corpus=(text)
|
27
|
+
@corpus = text.dup
|
27
28
|
end
|
28
29
|
|
29
30
|
# We will want to change this to something that splits the words into an
|
@@ -58,7 +59,7 @@ class SplitSentence
|
|
58
59
|
end
|
59
60
|
|
60
61
|
def expand_corpus(text)
|
61
|
-
self.corpus += "
|
62
|
+
self.corpus += " #{text}"
|
62
63
|
end
|
63
64
|
|
64
65
|
private
|
data/lib/markovite/version.rb
CHANGED
data/markovite.gemspec
CHANGED
@@ -28,7 +28,7 @@ Gem::Specification.new do |spec|
|
|
28
28
|
end
|
29
29
|
|
30
30
|
spec.require_paths = ["lib", "config"]
|
31
|
-
|
31
|
+
spec.add_runtime_dependency "msgpack"
|
32
32
|
spec.add_development_dependency "bundler", "~> 1.16"
|
33
33
|
spec.add_development_dependency "rake", "~> 10.0"
|
34
34
|
spec.add_development_dependency "rspec", "~> 3.0"
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markovite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexandra Wright
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: msgpack
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,8 +120,6 @@ files:
|
|
106
120
|
- lib/markovite/splitter.rb
|
107
121
|
- lib/markovite/version.rb
|
108
122
|
- markovite.gemspec
|
109
|
-
- pkg/markovite-0.2.2.gem
|
110
|
-
- pkg/markovite-0.2.3.gem
|
111
123
|
homepage: https://github.com/f3mshep/ruby_markovify
|
112
124
|
licenses:
|
113
125
|
- MIT
|
data/pkg/markovite-0.2.2.gem
DELETED
Binary file
|
data/pkg/markovite-0.2.3.gem
DELETED
Binary file
|