markov-ruby 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/console +14 -0
- data/lib/markov.rb +15 -0
- data/lib/markov/db.rb +92 -0
- data/lib/markov/generate.rb +43 -0
- data/lib/markov/parser.rb +33 -0
- data/lib/markov/version.rb +3 -0
- metadata +133 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e7dffa91c3e5e4a258f78dceda0418d5b047cb0e
|
4
|
+
data.tar.gz: e30b7d34af228f58425c9ab616b1a98f6129f0a7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: bfad5c546e6b1dd84b0d1c09eb68e8bb3a341393d8a514e256e846f2793877e6c6be3671433812452362b898bb1364353a2a08e202d46a70edcf556c774161c1
|
7
|
+
data.tar.gz: 987c317d1dedcd637fbfb1f429c854cb6769fd6aed49c093732ff64c00539c0fe3e5025d0594ddfb771799d82b7176de33378d5a9a7cf169c2cad83e99a3513e
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
lib = File.expand_path("../../lib", __FILE__)
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
|
+
|
6
|
+
require "markov"
|
7
|
+
require "irb"
|
8
|
+
|
9
|
+
@source = "../test/fixtures/text_sample.txt"
|
10
|
+
@dbname = "markov_test"
|
11
|
+
@db = Markov::DB.new(dbname: @dbname, chunk: 4)
|
12
|
+
@g = Markov::Generate.new("markov_test")
|
13
|
+
|
14
|
+
IRB.start
|
data/lib/markov.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'markov/generate'
|
2
|
+
require 'markov/version'
|
3
|
+
require 'markov/parser'
|
4
|
+
require 'markov/db'
|
5
|
+
require 'fileutils'
|
6
|
+
require 'engtagger'
|
7
|
+
require 'tmpdir'
|
8
|
+
require 'byebug'
|
9
|
+
require 'json'
|
10
|
+
require 'csv'
|
11
|
+
require 'cgi'
|
12
|
+
require 'pg'
|
13
|
+
|
14
|
+
module Markov
|
15
|
+
end
|
data/lib/markov/db.rb
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
module Markov
|
2
|
+
class DB
|
3
|
+
|
4
|
+
def initialize options={}
|
5
|
+
@host = options[:host] || "localhost"
|
6
|
+
@user = options[:user] || "postgres"
|
7
|
+
@port = options[:port] || 5432
|
8
|
+
@dbname = options[:dbname] || ""
|
9
|
+
@password = options[:password] || ""
|
10
|
+
@chunk = options[:chunk] || 4
|
11
|
+
end
|
12
|
+
|
13
|
+
def config
|
14
|
+
{
|
15
|
+
host: @host,
|
16
|
+
port: @port,
|
17
|
+
user: @user,
|
18
|
+
dbname: @dbname,
|
19
|
+
password: @password
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
def word_groups source, options={}
|
24
|
+
@word_groups ||= Parser.new(source).groups(@chunk, options)
|
25
|
+
end
|
26
|
+
|
27
|
+
def split input
|
28
|
+
[input].flatten.inject(""){ |r,a|
|
29
|
+
r << [CGI.escape(a.split('/')[0]), a.split('/')[1] || "\"\""].join(",")
|
30
|
+
}
|
31
|
+
end
|
32
|
+
|
33
|
+
def tmp_csv name, source
|
34
|
+
@dir = Dir.mktmpdir
|
35
|
+
@path = [@dir, name].join("/")
|
36
|
+
@tmp_csv = CSV.open(@path, "wb") do |csv|
|
37
|
+
word_groups(source, { tagged: true }).each do |g|
|
38
|
+
csv << [
|
39
|
+
name,
|
40
|
+
"{#{g[:prefix].map { |w| split(w) }.join(",")}}",
|
41
|
+
split(g[:suffix])
|
42
|
+
]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
@path
|
46
|
+
end
|
47
|
+
|
48
|
+
def import_csv name, source
|
49
|
+
begin
|
50
|
+
@csv = tmp_csv(name, source)
|
51
|
+
@query = "COPY word_groups FROM '#{@dir}/#{name}' DELIMITER ',' CSV"
|
52
|
+
connection(@query)
|
53
|
+
ensure
|
54
|
+
@csv && FileUtils.remove_entry(File.dirname(@csv))
|
55
|
+
@word_groups = nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def lookup word, source
|
60
|
+
@query = "SELECT suffix, count(*) AS count
|
61
|
+
FROM word_groups
|
62
|
+
WHERE prefix[5] = '#{word}'
|
63
|
+
AND source = '#{source}'
|
64
|
+
GROUP BY suffix"
|
65
|
+
connection(@query).values
|
66
|
+
end
|
67
|
+
|
68
|
+
def csv_sources
|
69
|
+
@query = "SELECT DISTINCT source
|
70
|
+
FROM word_groups"
|
71
|
+
connection(@query).values.flatten
|
72
|
+
end
|
73
|
+
|
74
|
+
def json_sources
|
75
|
+
@query = "SELECT DISTINCT word_groups->'source'
|
76
|
+
FROM word_groups_jsonb"
|
77
|
+
connection(@query).values.flatten
|
78
|
+
end
|
79
|
+
|
80
|
+
private
|
81
|
+
|
82
|
+
def connection query=nil
|
83
|
+
begin
|
84
|
+
conn = PG.connect(config)
|
85
|
+
query && conn.exec(query)
|
86
|
+
ensure
|
87
|
+
conn.close
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Markov
|
2
|
+
class Generate
|
3
|
+
|
4
|
+
attr_reader :length, :weight, :start
|
5
|
+
|
6
|
+
def initialize dbname, options={}
|
7
|
+
@length = options[:length] || 200
|
8
|
+
@weight = options[:weight] || 1.2
|
9
|
+
@start = options[:start] || "The"
|
10
|
+
@table = options[:table]
|
11
|
+
@db = DB.new(dbname: dbname, chunk: options[:chunk] || 4)
|
12
|
+
end
|
13
|
+
|
14
|
+
def current_word
|
15
|
+
@current_word = @word || @start
|
16
|
+
end
|
17
|
+
|
18
|
+
def next_word
|
19
|
+
word = current_word.match(/^,/) ? "," : current_word.split(",")[0]
|
20
|
+
words = lookup(CGI.escape(word), @table)
|
21
|
+
index = words[:probability].sample
|
22
|
+
@word = CGI.unescape(words[:suffices][index])
|
23
|
+
end
|
24
|
+
|
25
|
+
def lookup word, table
|
26
|
+
@lookup = @db.lookup(word, table)
|
27
|
+
{
|
28
|
+
suffices: @lookup.map { |s| s[0] },
|
29
|
+
probability: @lookup.map.each_with_index { |c,i|
|
30
|
+
[ (c[1].to_i ** @weight).round.times.inject([]){ |r,a| r << i } ]
|
31
|
+
}.flatten
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def text
|
36
|
+
(0..@length).inject("#@start "){ |r,a|
|
37
|
+
next_word
|
38
|
+
r << "#{current_word.match(/^,/) ? "," : current_word.split(",")[0]} "
|
39
|
+
}.strip.squeeze(" ").gsub(/\s(\.|\,|:|;|`|'|\?|!)/,"\\1")
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Markov
|
2
|
+
class Parser
|
3
|
+
|
4
|
+
attr_reader :source
|
5
|
+
|
6
|
+
def initialize source, options={}
|
7
|
+
@source = source
|
8
|
+
end
|
9
|
+
|
10
|
+
def raw_text
|
11
|
+
@raw_text ||= File.read(@source).scrub!
|
12
|
+
end
|
13
|
+
|
14
|
+
def tagged_text
|
15
|
+
tagger = EngTagger.new
|
16
|
+
@tagged_text ||= tagger.get_readable(raw_text)
|
17
|
+
end
|
18
|
+
|
19
|
+
def groups chunk_size, options={}
|
20
|
+
text = options[:tagged] ? tagged_text : raw_text
|
21
|
+
words = text.split
|
22
|
+
(chunk_size - 1).times { |r,a| words.unshift("\"\"") }
|
23
|
+
|
24
|
+
words.each_cons(chunk_size).to_a.inject([]){ |r,a|
|
25
|
+
r << {
|
26
|
+
prefix: a[0..(chunk_size - 2)],
|
27
|
+
suffix: a.last
|
28
|
+
}
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
metadata
ADDED
@@ -0,0 +1,133 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: markov-ruby
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- rob allen
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-07-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: engtagger
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.2.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 0.2.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: pg
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.18.4
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.18.4
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: bundler
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '1.7'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '1.7'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: guard
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - '='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 2.14.0
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - '='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 2.14.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: guard-minitest
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - '='
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 2.4.5
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - '='
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 2.4.5
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: byebug
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - '='
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 9.0.5
|
90
|
+
type: :development
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - '='
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 9.0.5
|
97
|
+
description: A postgresql backed markov text generator.
|
98
|
+
email: rob.all3n@gmail.com
|
99
|
+
executables: []
|
100
|
+
extensions: []
|
101
|
+
extra_rdoc_files: []
|
102
|
+
files:
|
103
|
+
- bin/console
|
104
|
+
- lib/markov.rb
|
105
|
+
- lib/markov/db.rb
|
106
|
+
- lib/markov/generate.rb
|
107
|
+
- lib/markov/parser.rb
|
108
|
+
- lib/markov/version.rb
|
109
|
+
homepage: ''
|
110
|
+
licenses:
|
111
|
+
- MIT
|
112
|
+
metadata: {}
|
113
|
+
post_install_message:
|
114
|
+
rdoc_options: []
|
115
|
+
require_paths:
|
116
|
+
- lib
|
117
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
requirements: []
|
128
|
+
rubyforge_project:
|
129
|
+
rubygems_version: 2.5.1
|
130
|
+
signing_key:
|
131
|
+
specification_version: 4
|
132
|
+
summary: A postgresql backed markov text generator.
|
133
|
+
test_files: []
|