markovite 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -1
- data/config/environment.rb +2 -1
- data/lib/markovite.rb +40 -15
- data/lib/markovite/chainer.rb +3 -2
- data/lib/markovite/dict.rb +29 -13
- data/lib/markovite/splitter.rb +7 -6
- data/lib/markovite/version.rb +1 -1
- data/markovite.gemspec +1 -1
- metadata +16 -4
- data/pkg/markovite-0.2.2.gem +0 -0
- data/pkg/markovite-0.2.3.gem +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 844cdac9c79870dc34add5123467ba7de998324dbceb333c3358f45b96de15a5
|
4
|
+
data.tar.gz: 3321497a835c6df6a38e34b8f211ffa1dbd1916bfe152ae071b8210b54a79c18
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8d6a1d275ce80f2deef594d19baf7abd2a52652e6d3e5c034b06a0950777ef3f4ed62411c59c509f0e3e5767aac683b9f32d7075a4de26133588eb4b821042f5
|
7
|
+
data.tar.gz: bb8f7930db79db564bffeb2676200d559a26f202bcc4ed7d49d296b0ad41299d23575cc9e4f1dc0724430645e530d627d15d122379c09242522886843287ef9f
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
markovite (0.2.
|
4
|
+
markovite (0.2.5)
|
5
|
+
msgpack
|
5
6
|
|
6
7
|
GEM
|
7
8
|
remote: https://rubygems.org/
|
@@ -10,6 +11,7 @@ GEM
|
|
10
11
|
diff-lcs (1.3)
|
11
12
|
interception (0.5)
|
12
13
|
method_source (0.9.0)
|
14
|
+
msgpack (1.2.4)
|
13
15
|
pry (0.11.3)
|
14
16
|
coderay (~> 1.1.0)
|
15
17
|
method_source (~> 0.9.0)
|
data/config/environment.rb
CHANGED
data/lib/markovite.rb
CHANGED
@@ -11,9 +11,33 @@ module Markovite
|
|
11
11
|
MAX_FILENAME_LENGTH = 255
|
12
12
|
|
13
13
|
def initialize(filename = nil, dict_depth=DEFAULT_DEPTH)
|
14
|
+
initialize_children
|
14
15
|
parse_file(filename, dict_depth) if filename
|
15
16
|
end
|
16
17
|
|
18
|
+
def save(filename)
|
19
|
+
raise("Chain is empty") if dictionary.chain.empty?
|
20
|
+
msg_hash = {}
|
21
|
+
msg_hash["sentences"] = dictionary.sentences
|
22
|
+
msg_hash["chain"] = dictionary.chain
|
23
|
+
msg_hash["corpus"] = split.corpus
|
24
|
+
msg_hash["depth"] = dictionary.depth
|
25
|
+
File.open("#{filename}.msg", "w") do |file|
|
26
|
+
test = file.write(msg_hash.to_msgpack)
|
27
|
+
end
|
28
|
+
true
|
29
|
+
end
|
30
|
+
|
31
|
+
def load(filename)
|
32
|
+
raise("Invalid file type") if !is_valid_file_ext?(filename, /.msg\z/i)
|
33
|
+
data = File.read("#{filename}")
|
34
|
+
model = MessagePack.unpack(data)
|
35
|
+
@depth = model["depth"]
|
36
|
+
split.corpus = model["corpus"]
|
37
|
+
dictionary.sentences = model["sentences"]
|
38
|
+
dictionary.chain = model["chain"]
|
39
|
+
end
|
40
|
+
|
17
41
|
def self.combine(left_chain, right_chain, dict_depth = nil)
|
18
42
|
dict_depth = dict_depth || left_chain.depth
|
19
43
|
new_chain = Markovite::Chain.new
|
@@ -26,12 +50,13 @@ module Markovite
|
|
26
50
|
split.corpus
|
27
51
|
end
|
28
52
|
|
29
|
-
def parse_string(text, dict_depth=nil)
|
30
|
-
|
31
|
-
|
32
|
-
if chainer
|
53
|
+
def parse_string(text, dict_depth = nil)
|
54
|
+
if self.depth
|
55
|
+
depth_check(dict_depth)
|
33
56
|
add_from_text(text)
|
34
57
|
else
|
58
|
+
dict_depth = dict_depth || DEFAULT_DEPTH
|
59
|
+
is_valid_depth?(dict_depth)
|
35
60
|
new_from_text(text, dict_depth)
|
36
61
|
end
|
37
62
|
end
|
@@ -75,11 +100,7 @@ module Markovite
|
|
75
100
|
private
|
76
101
|
|
77
102
|
def depth_check(dict_depth)
|
78
|
-
|
79
|
-
raise "Chain depth conflict" if dict_depth != depth
|
80
|
-
elsif !is_valid_depth?(dict_depth)
|
81
|
-
raise "Chain depth must be between #{MIN_DEPTH} and #{MAX_DEPTH}"
|
82
|
-
end
|
103
|
+
raise "Chain depth conflict" if !dict_depth.nil? && dict_depth != depth
|
83
104
|
end
|
84
105
|
|
85
106
|
def is_valid_depth?(dict_depth)
|
@@ -91,16 +112,14 @@ module Markovite
|
|
91
112
|
end
|
92
113
|
|
93
114
|
def new_from_text(text, dict_depth)
|
94
|
-
#look into refactoring this
|
95
115
|
@depth = dict_depth
|
96
|
-
self.split = SplitSentence.new(text)
|
97
116
|
@corpus = split.corpus
|
98
|
-
|
99
|
-
self.chainer = Chainer.new(dictionary)
|
117
|
+
dictionary.expand_chain(text)
|
100
118
|
end
|
101
119
|
|
102
|
-
|
103
|
-
|
120
|
+
|
121
|
+
def is_valid_file_ext?(filename, ext = nil)
|
122
|
+
re = ext || Regexp.union(FILE_EXT)
|
104
123
|
filename.match(re)
|
105
124
|
end
|
106
125
|
|
@@ -108,6 +127,12 @@ module Markovite
|
|
108
127
|
str.length < 255 && split_words(str).length == 1
|
109
128
|
end
|
110
129
|
|
130
|
+
def initialize_children
|
131
|
+
self.split = SplitSentence.new
|
132
|
+
self.dictionary = Dictionary.new({sentence_split: split})
|
133
|
+
self.chainer = Chainer.new(dictionary)
|
134
|
+
end
|
135
|
+
|
111
136
|
def add_from_text(text)
|
112
137
|
dictionary.expand_chain(text)
|
113
138
|
end
|
data/lib/markovite/chainer.rb
CHANGED
@@ -90,8 +90,9 @@ class Chainer
|
|
90
90
|
|
91
91
|
def remove_markers(sentence)
|
92
92
|
#removes BEGINNING and ENDING markers
|
93
|
-
sentence.
|
94
|
-
sentence.
|
93
|
+
sentence.shift while sentence.first == BEGINNING
|
94
|
+
sentence.pop while sentence.last == ENDING
|
95
|
+
sentence
|
95
96
|
end
|
96
97
|
|
97
98
|
def is_valid_sentence?(sentence)
|
data/lib/markovite/dict.rb
CHANGED
@@ -6,26 +6,33 @@ class Dictionary
|
|
6
6
|
#make this a module???
|
7
7
|
BEGINNING = "__BEGIN__"
|
8
8
|
ENDING = "__END__"
|
9
|
+
DEFAULT_DEPTH = 2
|
9
10
|
|
10
|
-
attr_accessor :
|
11
|
-
attr_reader :depth
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
# as the default value
|
17
|
-
self.chain = Hash.new { |h, k| h[k] = [] }
|
18
|
-
self.sentences = sentence_split.split_text
|
19
|
-
@depth = depth
|
20
|
-
construct_chain
|
11
|
+
attr_accessor :sentence_split, :sentences
|
12
|
+
attr_reader :depth, :chain
|
13
|
+
|
14
|
+
def initialize(attributes)
|
15
|
+
attributes.each {|attribute, value| self.send("#{attribute}=", value)}
|
16
|
+
set_default
|
21
17
|
end
|
22
18
|
|
23
19
|
def has_sentence(sentence)
|
24
20
|
sentences.include?(sentence)
|
25
21
|
end
|
26
22
|
|
23
|
+
def chain=(arg)
|
24
|
+
# The following line ensures a new array is created for each new key
|
25
|
+
# instead of using the memory address of the first array created
|
26
|
+
# as the default value
|
27
|
+
@chain = Hash.new { |h, k| h[k] = [] } if self.chain.nil?
|
28
|
+
arg.each {|key, value|chain[key] = value}
|
29
|
+
chain
|
30
|
+
end
|
31
|
+
|
27
32
|
def depth=(arg)
|
28
|
-
raise "Depth cannot be changed"
|
33
|
+
raise "Depth cannot be changed" if depth
|
34
|
+
raise "Depth must be integer" if arg.class != Integer
|
35
|
+
@depth = arg
|
29
36
|
end
|
30
37
|
|
31
38
|
def expand_chain(text)
|
@@ -36,8 +43,8 @@ class Dictionary
|
|
36
43
|
end
|
37
44
|
|
38
45
|
def construct_chain(new_sentences = nil)
|
46
|
+
self.depth = DEFAULT_DEPTH if depth.nil?
|
39
47
|
new_sentences = new_sentences || sentences
|
40
|
-
raise "No sentences in memory" if new_sentences.empty?
|
41
48
|
new_sentences.each do |sentence|
|
42
49
|
words = sentence.split(" ")
|
43
50
|
# each chunk is an array that represents a state in the markov chain
|
@@ -65,4 +72,13 @@ class Dictionary
|
|
65
72
|
sentences.clear
|
66
73
|
end
|
67
74
|
|
75
|
+
private
|
76
|
+
|
77
|
+
def set_default
|
78
|
+
self.sentence_split = sentence_split || SentenceSplit.new
|
79
|
+
self.chain = chain || {}
|
80
|
+
self.sentences = sentences || sentence_split.split_text
|
81
|
+
construct_chain if chain.empty?
|
82
|
+
end
|
83
|
+
|
68
84
|
end
|
data/lib/markovite/splitter.rb
CHANGED
@@ -17,13 +17,14 @@ class SplitSentence
|
|
17
17
|
"prof.", "rev.", "rt. hon.", "sr.", "st." #personal
|
18
18
|
]
|
19
19
|
|
20
|
-
|
20
|
+
attr_reader :corpus
|
21
21
|
|
22
|
-
|
22
|
+
def initialize(corpus = nil)
|
23
|
+
self.corpus = corpus || ""
|
24
|
+
end
|
23
25
|
|
24
|
-
def
|
25
|
-
|
26
|
-
split_text
|
26
|
+
def corpus=(text)
|
27
|
+
@corpus = text.dup
|
27
28
|
end
|
28
29
|
|
29
30
|
# We will want to change this to something that splits the words into an
|
@@ -58,7 +59,7 @@ class SplitSentence
|
|
58
59
|
end
|
59
60
|
|
60
61
|
def expand_corpus(text)
|
61
|
-
self.corpus += "
|
62
|
+
self.corpus += " #{text}"
|
62
63
|
end
|
63
64
|
|
64
65
|
private
|
data/lib/markovite/version.rb
CHANGED
data/markovite.gemspec
CHANGED
@@ -28,7 +28,7 @@ Gem::Specification.new do |spec|
|
|
28
28
|
end
|
29
29
|
|
30
30
|
spec.require_paths = ["lib", "config"]
|
31
|
-
|
31
|
+
spec.add_runtime_dependency "msgpack"
|
32
32
|
spec.add_development_dependency "bundler", "~> 1.16"
|
33
33
|
spec.add_development_dependency "rake", "~> 10.0"
|
34
34
|
spec.add_development_dependency "rspec", "~> 3.0"
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markovite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexandra Wright
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: msgpack
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -106,8 +120,6 @@ files:
|
|
106
120
|
- lib/markovite/splitter.rb
|
107
121
|
- lib/markovite/version.rb
|
108
122
|
- markovite.gemspec
|
109
|
-
- pkg/markovite-0.2.2.gem
|
110
|
-
- pkg/markovite-0.2.3.gem
|
111
123
|
homepage: https://github.com/f3mshep/ruby_markovify
|
112
124
|
licenses:
|
113
125
|
- MIT
|
data/pkg/markovite-0.2.2.gem
DELETED
Binary file
|
data/pkg/markovite-0.2.3.gem
DELETED
Binary file
|