markovite 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a45b08ff10130126c3f0325fca5a1a1d23d31b942e0430d088f73b6ab812faac
4
- data.tar.gz: 698a6acf00459bb8f61f8325e48f2c096b36af8de6890126d2e68c2a0d994627
3
+ metadata.gz: 844cdac9c79870dc34add5123467ba7de998324dbceb333c3358f45b96de15a5
4
+ data.tar.gz: 3321497a835c6df6a38e34b8f211ffa1dbd1916bfe152ae071b8210b54a79c18
5
5
  SHA512:
6
- metadata.gz: d2579a001419cf3ca5913d8d55b7d05b98f5628de91c7b128b9276d58fc22cb2fded0237dc0852df1f2c4c15c1d461ac7e4f522fca1d002c9cae254ede2eb4fc
7
- data.tar.gz: e5a2b74db3c68cde040a1d935d9c62237d6f076fa2d82893cdcbed0a7b42171ca8e47d71c76bcff15c7c663ef1edd1e231e5dbd1353dc78a2d8023d246c4c381
6
+ metadata.gz: 8d6a1d275ce80f2deef594d19baf7abd2a52652e6d3e5c034b06a0950777ef3f4ed62411c59c509f0e3e5767aac683b9f32d7075a4de26133588eb4b821042f5
7
+ data.tar.gz: bb8f7930db79db564bffeb2676200d559a26f202bcc4ed7d49d296b0ad41299d23575cc9e4f1dc0724430645e530d627d15d122379c09242522886843287ef9f
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- markovite (0.2.4)
4
+ markovite (0.2.5)
5
+ msgpack
5
6
 
6
7
  GEM
7
8
  remote: https://rubygems.org/
@@ -10,6 +11,7 @@ GEM
10
11
  diff-lcs (1.3)
11
12
  interception (0.5)
12
13
  method_source (0.9.0)
14
+ msgpack (1.2.4)
13
15
  pry (0.11.3)
14
16
  coderay (~> 1.1.0)
15
17
  method_source (~> 0.9.0)
@@ -1,4 +1,5 @@
1
1
  require_relative "../lib/markovite/chainer.rb"
2
2
  require_relative "../lib/markovite/dict.rb"
3
3
  require_relative "../lib/markovite/splitter.rb"
4
- require_relative "../lib/markovite.rb"
4
+ require_relative "../lib/markovite.rb"
5
+ require "msgpack"
@@ -11,9 +11,33 @@ module Markovite
11
11
  MAX_FILENAME_LENGTH = 255
12
12
 
13
13
  def initialize(filename = nil, dict_depth=DEFAULT_DEPTH)
14
+ initialize_children
14
15
  parse_file(filename, dict_depth) if filename
15
16
  end
16
17
 
18
+ def save(filename)
19
+ raise("Chain is empty") if dictionary.chain.empty?
20
+ msg_hash = {}
21
+ msg_hash["sentences"] = dictionary.sentences
22
+ msg_hash["chain"] = dictionary.chain
23
+ msg_hash["corpus"] = split.corpus
24
+ msg_hash["depth"] = dictionary.depth
25
+ File.open("#{filename}.msg", "w") do |file|
26
+ test = file.write(msg_hash.to_msgpack)
27
+ end
28
+ true
29
+ end
30
+
31
+ def load(filename)
32
+ raise("Invalid file type") if !is_valid_file_ext?(filename, /.msg\z/i)
33
+ data = File.read("#{filename}")
34
+ model = MessagePack.unpack(data)
35
+ @depth = model["depth"]
36
+ split.corpus = model["corpus"]
37
+ dictionary.sentences = model["sentences"]
38
+ dictionary.chain = model["chain"]
39
+ end
40
+
17
41
  def self.combine(left_chain, right_chain, dict_depth = nil)
18
42
  dict_depth = dict_depth || left_chain.depth
19
43
  new_chain = Markovite::Chain.new
@@ -26,12 +50,13 @@ module Markovite
26
50
  split.corpus
27
51
  end
28
52
 
29
- def parse_string(text, dict_depth=nil)
30
- dict_depth = dict_depth || self.depth || DEFAULT_DEPTH
31
- depth_check(dict_depth)
32
- if chainer
53
+ def parse_string(text, dict_depth = nil)
54
+ if self.depth
55
+ depth_check(dict_depth)
33
56
  add_from_text(text)
34
57
  else
58
+ dict_depth = dict_depth || DEFAULT_DEPTH
59
+ is_valid_depth?(dict_depth)
35
60
  new_from_text(text, dict_depth)
36
61
  end
37
62
  end
@@ -75,11 +100,7 @@ module Markovite
75
100
  private
76
101
 
77
102
  def depth_check(dict_depth)
78
- if chainer
79
- raise "Chain depth conflict" if dict_depth != depth
80
- elsif !is_valid_depth?(dict_depth)
81
- raise "Chain depth must be between #{MIN_DEPTH} and #{MAX_DEPTH}"
82
- end
103
+ raise "Chain depth conflict" if !dict_depth.nil? && dict_depth != depth
83
104
  end
84
105
 
85
106
  def is_valid_depth?(dict_depth)
@@ -91,16 +112,14 @@ module Markovite
91
112
  end
92
113
 
93
114
  def new_from_text(text, dict_depth)
94
- #look into refactoring this
95
115
  @depth = dict_depth
96
- self.split = SplitSentence.new(text)
97
116
  @corpus = split.corpus
98
- self.dictionary = Dictionary.new(split, depth)
99
- self.chainer = Chainer.new(dictionary)
117
+ dictionary.expand_chain(text)
100
118
  end
101
119
 
102
- def is_valid_file_ext?(filename)
103
- re = Regexp.union(FILE_EXT)
120
+
121
+ def is_valid_file_ext?(filename, ext = nil)
122
+ re = ext || Regexp.union(FILE_EXT)
104
123
  filename.match(re)
105
124
  end
106
125
 
@@ -108,6 +127,12 @@ module Markovite
108
127
  str.length < 255 && split_words(str).length == 1
109
128
  end
110
129
 
130
+ def initialize_children
131
+ self.split = SplitSentence.new
132
+ self.dictionary = Dictionary.new({sentence_split: split})
133
+ self.chainer = Chainer.new(dictionary)
134
+ end
135
+
111
136
  def add_from_text(text)
112
137
  dictionary.expand_chain(text)
113
138
  end
@@ -90,8 +90,9 @@ class Chainer
90
90
 
91
91
  def remove_markers(sentence)
92
92
  #removes BEGINNING and ENDING markers
93
- sentence.pop
94
- sentence.shift(depth)
93
+ sentence.shift while sentence.first == BEGINNING
94
+ sentence.pop while sentence.last == ENDING
95
+ sentence
95
96
  end
96
97
 
97
98
  def is_valid_sentence?(sentence)
@@ -6,26 +6,33 @@ class Dictionary
6
6
  #make this a module???
7
7
  BEGINNING = "__BEGIN__"
8
8
  ENDING = "__END__"
9
+ DEFAULT_DEPTH = 2
9
10
 
10
- attr_accessor :chain, :sentence_split, :sentences
11
- attr_reader :depth
12
- def initialize(sentence_split = nil, depth = 2)
13
- self.sentence_split = sentence_split || SentenceSplit.new
14
- # The following line ensures a new array is created for each new key
15
- # instead of using the memory address of the first array created raise exception "First argument must contain a SplitSentence instance" if sentence_split.class != SentenceSplit
16
- # as the default value
17
- self.chain = Hash.new { |h, k| h[k] = [] }
18
- self.sentences = sentence_split.split_text
19
- @depth = depth
20
- construct_chain
11
+ attr_accessor :sentence_split, :sentences
12
+ attr_reader :depth, :chain
13
+
14
+ def initialize(attributes)
15
+ attributes.each {|attribute, value| self.send("#{attribute}=", value)}
16
+ set_default
21
17
  end
22
18
 
23
19
  def has_sentence(sentence)
24
20
  sentences.include?(sentence)
25
21
  end
26
22
 
23
+ def chain=(arg)
24
+ # The following line ensures a new array is created for each new key
25
+ # instead of using the memory address of the first array created
26
+ # as the default value
27
+ @chain = Hash.new { |h, k| h[k] = [] } if self.chain.nil?
28
+ arg.each {|key, value|chain[key] = value}
29
+ chain
30
+ end
31
+
27
32
  def depth=(arg)
28
- raise "Depth cannot be changed"
33
+ raise "Depth cannot be changed" if depth
34
+ raise "Depth must be integer" if arg.class != Integer
35
+ @depth = arg
29
36
  end
30
37
 
31
38
  def expand_chain(text)
@@ -36,8 +43,8 @@ class Dictionary
36
43
  end
37
44
 
38
45
  def construct_chain(new_sentences = nil)
46
+ self.depth = DEFAULT_DEPTH if depth.nil?
39
47
  new_sentences = new_sentences || sentences
40
- raise "No sentences in memory" if new_sentences.empty?
41
48
  new_sentences.each do |sentence|
42
49
  words = sentence.split(" ")
43
50
  # each chunk is an array that represents a state in the markov chain
@@ -65,4 +72,13 @@ class Dictionary
65
72
  sentences.clear
66
73
  end
67
74
 
75
+ private
76
+
77
+ def set_default
78
+ self.sentence_split = sentence_split || SentenceSplit.new
79
+ self.chain = chain || {}
80
+ self.sentences = sentences || sentence_split.split_text
81
+ construct_chain if chain.empty?
82
+ end
83
+
68
84
  end
@@ -17,13 +17,14 @@ class SplitSentence
17
17
  "prof.", "rev.", "rt. hon.", "sr.", "st." #personal
18
18
  ]
19
19
 
20
- #look into detecting abbreviations!
20
+ attr_reader :corpus
21
21
 
22
- attr_accessor :corpus
22
+ def initialize(corpus = nil)
23
+ self.corpus = corpus || ""
24
+ end
23
25
 
24
- def initialize(corpus = "")
25
- self.corpus = corpus.dup
26
- split_text
26
+ def corpus=(text)
27
+ @corpus = text.dup
27
28
  end
28
29
 
29
30
  # We will want to change this to something that splits the words into an
@@ -58,7 +59,7 @@ class SplitSentence
58
59
  end
59
60
 
60
61
  def expand_corpus(text)
61
- self.corpus += "\n" + text
62
+ self.corpus += " #{text}"
62
63
  end
63
64
 
64
65
  private
@@ -1,3 +1,3 @@
1
1
  module Markovite
2
- VERSION = "0.2.4"
2
+ VERSION = "0.2.5"
3
3
  end
@@ -28,7 +28,7 @@ Gem::Specification.new do |spec|
28
28
  end
29
29
 
30
30
  spec.require_paths = ["lib", "config"]
31
-
31
+ spec.add_runtime_dependency "msgpack"
32
32
  spec.add_development_dependency "bundler", "~> 1.16"
33
33
  spec.add_development_dependency "rake", "~> 10.0"
34
34
  spec.add_development_dependency "rspec", "~> 3.0"
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markovite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alexandra Wright
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-02 00:00:00.000000000 Z
11
+ date: 2018-04-12 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: msgpack
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: bundler
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -106,8 +120,6 @@ files:
106
120
  - lib/markovite/splitter.rb
107
121
  - lib/markovite/version.rb
108
122
  - markovite.gemspec
109
- - pkg/markovite-0.2.2.gem
110
- - pkg/markovite-0.2.3.gem
111
123
  homepage: https://github.com/f3mshep/ruby_markovify
112
124
  licenses:
113
125
  - MIT
Binary file
Binary file