markovite 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/Gemfile.lock +8 -2
- data/README.md +46 -4
- data/lib/markovite.rb +5 -3
- data/lib/markovite/chainer.rb +12 -4
- data/lib/markovite/dict.rb +2 -2
- data/lib/markovite/splitter.rb +48 -23
- data/lib/markovite/version.rb +1 -1
- data/markovite.gemspec +2 -1
- metadata +20 -7
- data/pkg/markovite-0.1.0.gem +0 -0
- data/pkg/markovite-0.2.0.gem +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b6fee92d46759244fd174b5292ae9d7f5b418f25f9268922b9e1c9bc0d4d08fc
|
4
|
+
data.tar.gz: 12601a0899b487e1fae3415ad3cb12b7107d5961cf06f9d32dda38bd6968cb13
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15f2a32d7a25bae0bcb2b085ab548bc156fdc49cb50d9b196e731bd9d9b214e216f9ffd2f0c6570516e966907f1eb06172879b5d718661ab9b7605c25354803f
|
7
|
+
data.tar.gz: 6f39d34312712dbc3dee5070be285adfef7f98bc181def7ffc6d46b408864077d69e159cef2cd54638de61de3419193e6487d88c4d5bb9abbf0ed5e61406ecfb
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,12 +1,17 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
markovite (0.2.
|
4
|
+
markovite (0.2.2)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
8
8
|
specs:
|
9
|
+
coderay (1.1.2)
|
9
10
|
diff-lcs (1.3)
|
11
|
+
method_source (0.9.0)
|
12
|
+
pry (0.11.3)
|
13
|
+
coderay (~> 1.1.0)
|
14
|
+
method_source (~> 0.9.0)
|
10
15
|
rake (10.4.2)
|
11
16
|
rspec (3.7.0)
|
12
17
|
rspec-core (~> 3.7.0)
|
@@ -28,8 +33,9 @@ PLATFORMS
|
|
28
33
|
DEPENDENCIES
|
29
34
|
bundler (~> 1.16)
|
30
35
|
markovite!
|
36
|
+
pry
|
31
37
|
rake (~> 10.0)
|
32
38
|
rspec (~> 3.0)
|
33
39
|
|
34
40
|
BUNDLED WITH
|
35
|
-
1.16.
|
41
|
+
1.16.1
|
data/README.md
CHANGED
@@ -1,8 +1,6 @@
|
|
1
1
|
# Markovite
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
TODO: Delete this and the text above, and describe your gem
|
3
|
+
Markovite, a markov chain generator that is simple to use and easy to hack. You can use this gem to generate random text using a corpus.
|
6
4
|
|
7
5
|
## Installation
|
8
6
|
|
@@ -22,7 +20,51 @@ Or install it yourself as:
|
|
22
20
|
|
23
21
|
## Usage
|
24
22
|
|
25
|
-
|
23
|
+
To hit the ground running, you can do the following:
|
24
|
+
|
25
|
+
```ruby
|
26
|
+
require "markovite"
|
27
|
+
|
28
|
+
chain = Markovite::Chain.new
|
29
|
+
chain << "tiny-shakespeare.txt"
|
30
|
+
|
31
|
+
chain.make_sentence_of_length(140)
|
32
|
+
|
33
|
+
```
|
34
|
+
This will train the chain model on a file named `tiny-shakespear.txt` and return a sentence that is no longer than 140 characters, which does not appear in the training corpus.
|
35
|
+
|
36
|
+
You can push multiple files or strings into one chain instance. By default, chains will be initialized with a depth of 2.
|
37
|
+
|
38
|
+
### Expanding Chain
|
39
|
+
A new chain instance can optionally be initialized with
|
40
|
+
a filename as the first argument, and the desired depth size as the second argument.
|
41
|
+
|
42
|
+
`chain = Markovite::Chain.new("tiny-shakespeare.txt", 3)`
|
43
|
+
|
44
|
+
Instances of chains can be modified by using the more specific
|
45
|
+
`chain.parse_file("sherlock.txt", 3)`
|
46
|
+
or
|
47
|
+
`chain.parse_string("I am a giant hamster person")`
|
48
|
+
The second argument, the chain depth, will default to 2.
|
49
|
+
|
50
|
+
### Creating Sentences
|
51
|
+
|
52
|
+
`chain.make_sentence`
|
53
|
+
|
54
|
+
Returns one sentence that does not appear in the corpus, or nil if the model is unable to generate a unique sentence.
|
55
|
+
|
56
|
+
`chain.make_sentences(5)`
|
57
|
+
|
58
|
+
Returns five sentences that do not appear in the corpus, or nil if the model is unable to generate a unique sentence.
|
59
|
+
|
60
|
+
`chain.make_sentence_starts_with("Hello listeners")`
|
61
|
+
|
62
|
+
Returns a sentence that begins with the argument, or nil if the model is unable to generate a unique sentence.
|
63
|
+
|
64
|
+
`chain.make_sentence_of_length(280)`
|
65
|
+
|
66
|
+
Returns a sentence with the length of 280 characters, or nil if the model is unable to generate a unique sentence.
|
67
|
+
|
26
68
|
|
27
69
|
## Development
|
28
70
|
|
data/lib/markovite.rb
CHANGED
@@ -36,9 +36,7 @@ module Markovite
|
|
36
36
|
|
37
37
|
####Future Self: Make this a module####
|
38
38
|
|
39
|
-
|
40
|
-
str.split(" ")
|
41
|
-
end
|
39
|
+
|
42
40
|
|
43
41
|
def make_sentence
|
44
42
|
chainer.make_sentence
|
@@ -60,6 +58,10 @@ module Markovite
|
|
60
58
|
|
61
59
|
private
|
62
60
|
|
61
|
+
def split_words(str)
|
62
|
+
str.split(" ")
|
63
|
+
end
|
64
|
+
|
63
65
|
def new_from_text(text, dict_depth)
|
64
66
|
#look into refactoring this
|
65
67
|
@depth = dict_depth
|
data/lib/markovite/chainer.rb
CHANGED
@@ -17,18 +17,26 @@ class Chainer
|
|
17
17
|
|
18
18
|
def make_sentence_starts_with(phrase)
|
19
19
|
chunk = get_chunk(phrase)
|
20
|
-
|
20
|
+
begin
|
21
|
+
partial = generate_text(chunk)
|
22
|
+
rescue ArgumentError
|
23
|
+
return nil
|
24
|
+
end
|
21
25
|
"#{phrase} #{partial}"
|
22
26
|
end
|
23
27
|
|
24
28
|
def make_sentence_of_length(how_long)
|
25
|
-
|
29
|
+
begin
|
30
|
+
make_sentence_with_block {|sentence| sentence.length <= how_long}
|
31
|
+
rescue NoMethodError
|
32
|
+
return nil
|
33
|
+
end
|
26
34
|
end
|
27
35
|
|
28
36
|
def make_sentences(amount, condition=true)
|
29
37
|
sentences = []
|
30
38
|
amount.times do
|
31
|
-
sentences << make_sentence
|
39
|
+
sentences << make_sentence
|
32
40
|
end
|
33
41
|
sentences.join(' ')
|
34
42
|
end
|
@@ -76,7 +84,7 @@ class Chainer
|
|
76
84
|
|
77
85
|
def pick_next(words)
|
78
86
|
word_list = dictionary.chain[words]
|
79
|
-
raise "No matching state
|
87
|
+
raise ArgumentError, "No matching state" if word_list.empty?
|
80
88
|
word_list.sample
|
81
89
|
end
|
82
90
|
|
data/lib/markovite/dict.rb
CHANGED
@@ -29,8 +29,8 @@ class Dictionary
|
|
29
29
|
end
|
30
30
|
|
31
31
|
def expand_chain(text)
|
32
|
-
new_sentences = sentence_split.
|
33
|
-
sentences
|
32
|
+
new_sentences = sentence_split.split_text(text)
|
33
|
+
self.sentences += sentence_split.sentences
|
34
34
|
construct_chain(new_sentences)
|
35
35
|
end
|
36
36
|
|
data/lib/markovite/splitter.rb
CHANGED
@@ -3,7 +3,19 @@
|
|
3
3
|
|
4
4
|
class SplitSentence
|
5
5
|
|
6
|
-
|
6
|
+
ENDERS = ['?', '.', '!']
|
7
|
+
ABBREVIATIONS = [
|
8
|
+
'ave.','blvd.','ln','rd.','st.', #directional
|
9
|
+
'tsp.','t.', 'tbs.', 'tbsp.','gal.','lb.','pt.','qt.', #cooking
|
10
|
+
"ak.", "al.", "ar.", "az.", "ca.", "co.", "ct.", "dc.", "de.", "fl.",
|
11
|
+
"ga.", "gu.", "hi.", "ia.", "id.", "il.", "in.", "ks.", "ky.", "la.",
|
12
|
+
"ma.", "md.", "me.", "mh.", "mi.", "mn.", "mo.", "ms.", "mt.", "nc.",
|
13
|
+
"nd.", "ne.", "nh.", "nj.", "nm.", "nv.", "ny.", "oh.", "ok.", "or.",
|
14
|
+
"pa.", "pr.", "pw.", "ri.", "sc.", "sd.", "tn.", "tx.", "ut.", "va.",
|
15
|
+
"vi.", "vt.", "wa.", "wi.", "wv.", "wy.", "u.s.", "u.s.a,", #us locations
|
16
|
+
"dr.", "esq.", "jr.", "mr.", "mrs.", "ms.", "mx.",
|
17
|
+
"prof.", "rev.", "rt. hon.", "sr.", "st." #personal
|
18
|
+
]
|
7
19
|
|
8
20
|
#look into detecting abbreviations!
|
9
21
|
|
@@ -28,42 +40,55 @@ class SplitSentence
|
|
28
40
|
# This way, we can impose grammatical rules by making the first word of the sentence
|
29
41
|
# capitalized, and the end of the sentence will end with some sort of punctuation.
|
30
42
|
|
31
|
-
|
32
|
-
|
33
43
|
def split_text(new_text = nil)
|
34
|
-
|
35
|
-
|
44
|
+
clear_sentences
|
45
|
+
current_sentence = []
|
36
46
|
new_text = new_text || corpus
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
current_sentence
|
47
|
+
all_words = split_words(new_text)
|
48
|
+
all_words.each do |word|
|
49
|
+
if is_end_of_sentence?(word)
|
50
|
+
current_sentence = add_sentence(current_sentence, word)
|
51
|
+
elsif has_newline?(word)
|
52
|
+
newline_words = split_newline(word)
|
53
|
+
current_sentence = add_sentence(current_sentence, newline_words[0])
|
54
|
+
current_sentence << newline_words[1]
|
45
55
|
else
|
46
|
-
|
47
|
-
current_sentence << char
|
56
|
+
current_sentence << word
|
48
57
|
end
|
49
58
|
end
|
50
|
-
|
59
|
+
add_sentence(current_sentence, nil) if !current_sentence.empty?
|
60
|
+
sentences
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def add_sentence(sentence, word)
|
66
|
+
sentence << word if word
|
67
|
+
sentences << sentence.compact.join(" ")
|
68
|
+
[]
|
51
69
|
end
|
52
70
|
|
53
|
-
def
|
54
|
-
|
55
|
-
split_text(new_text)
|
71
|
+
def split_words(text)
|
72
|
+
text.split(/ /)
|
56
73
|
end
|
57
74
|
|
58
|
-
|
75
|
+
def is_abbreviation(word)
|
76
|
+
ABBREVIATIONS.include?(word.downcase)
|
77
|
+
end
|
59
78
|
|
60
|
-
def
|
79
|
+
def has_newline?(word)
|
80
|
+
word.include?("\n")
|
61
81
|
end
|
62
82
|
|
63
|
-
def
|
83
|
+
def split_newline(word)
|
84
|
+
word.split("\n").map{|str| str.empty? ? nil:str}
|
64
85
|
end
|
65
86
|
|
66
|
-
def
|
87
|
+
def is_end_of_sentence?(word)
|
88
|
+
#check punctuation before delving into abbreviations to save time
|
89
|
+
return false if !ENDERS.include?(word[-1])
|
90
|
+
return false if is_abbreviation(word)
|
91
|
+
return true
|
67
92
|
end
|
68
93
|
|
69
94
|
end
|
data/lib/markovite/version.rb
CHANGED
data/markovite.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["superbiscuit@gmail.com"]
|
11
11
|
|
12
12
|
spec.summary = "A markov chain generator that is simple to use and easy to hack."
|
13
|
-
spec.description = "Doctors hate this one weird trick that generates really good looking gibberish!"
|
13
|
+
spec.description = "Doctors hate this one weird trick that generates really good looking gibberish! \n Markovite is simple, but powerful markov chain generator that is designed to be hackable to your heart's delight"
|
14
14
|
spec.homepage = "https://github.com/f3mshep/ruby_markovify"
|
15
15
|
spec.license = "MIT"
|
16
16
|
|
@@ -32,4 +32,5 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.add_development_dependency "bundler", "~> 1.16"
|
33
33
|
spec.add_development_dependency "rake", "~> 10.0"
|
34
34
|
spec.add_development_dependency "rspec", "~> 3.0"
|
35
|
+
spec.add_development_dependency "pry"
|
35
36
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: markovite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Alexandra Wright
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-03-
|
11
|
+
date: 2018-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,8 +52,23 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '3.0'
|
55
|
-
|
56
|
-
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: pry
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: "Doctors hate this one weird trick that generates really good looking
|
70
|
+
gibberish! \n Markovite is simple, but powerful markov chain generator that is designed
|
71
|
+
to be hackable to your heart's delight"
|
57
72
|
email:
|
58
73
|
- superbiscuit@gmail.com
|
59
74
|
executables: []
|
@@ -77,8 +92,6 @@ files:
|
|
77
92
|
- lib/markovite/splitter.rb
|
78
93
|
- lib/markovite/version.rb
|
79
94
|
- markovite.gemspec
|
80
|
-
- pkg/markovite-0.1.0.gem
|
81
|
-
- pkg/markovite-0.2.0.gem
|
82
95
|
homepage: https://github.com/f3mshep/ruby_markovify
|
83
96
|
licenses:
|
84
97
|
- MIT
|
@@ -101,7 +114,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
101
114
|
version: '0'
|
102
115
|
requirements: []
|
103
116
|
rubyforge_project:
|
104
|
-
rubygems_version: 2.7.
|
117
|
+
rubygems_version: 2.7.6
|
105
118
|
signing_key:
|
106
119
|
specification_version: 4
|
107
120
|
summary: A markov chain generator that is simple to use and easy to hack.
|
data/pkg/markovite-0.1.0.gem
DELETED
Binary file
|
data/pkg/markovite-0.2.0.gem
DELETED
Binary file
|