markov-ruby 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e7dffa91c3e5e4a258f78dceda0418d5b047cb0e
4
+ data.tar.gz: e30b7d34af228f58425c9ab616b1a98f6129f0a7
5
+ SHA512:
6
+ metadata.gz: bfad5c546e6b1dd84b0d1c09eb68e8bb3a341393d8a514e256e846f2793877e6c6be3671433812452362b898bb1364353a2a08e202d46a70edcf556c774161c1
7
+ data.tar.gz: 987c317d1dedcd637fbfb1f429c854cb6769fd6aed49c093732ff64c00539c0fe3e5025d0594ddfb771799d82b7176de33378d5a9a7cf169c2cad83e99a3513e
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ lib = File.expand_path("../../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require "markov"
7
+ require "irb"
8
+
9
+ @source = "../test/fixtures/text_sample.txt"
10
+ @dbname = "markov_test"
11
+ @db = Markov::DB.new(dbname: @dbname, chunk: 4)
12
+ @g = Markov::Generate.new("markov_test")
13
+
14
+ IRB.start
@@ -0,0 +1,15 @@
1
+ require 'markov/generate'
2
+ require 'markov/version'
3
+ require 'markov/parser'
4
+ require 'markov/db'
5
+ require 'fileutils'
6
+ require 'engtagger'
7
+ require 'tmpdir'
8
+ require 'byebug'
9
+ require 'json'
10
+ require 'csv'
11
+ require 'cgi'
12
+ require 'pg'
13
+
14
+ module Markov
15
+ end
@@ -0,0 +1,92 @@
1
+ module Markov
2
+ class DB
3
+
4
+ def initialize options={}
5
+ @host = options[:host] || "localhost"
6
+ @user = options[:user] || "postgres"
7
+ @port = options[:port] || 5432
8
+ @dbname = options[:dbname] || ""
9
+ @password = options[:password] || ""
10
+ @chunk = options[:chunk] || 4
11
+ end
12
+
13
+ def config
14
+ {
15
+ host: @host,
16
+ port: @port,
17
+ user: @user,
18
+ dbname: @dbname,
19
+ password: @password
20
+ }
21
+ end
22
+
23
+ def word_groups source, options={}
24
+ @word_groups ||= Parser.new(source).groups(@chunk, options)
25
+ end
26
+
27
+ def split input
28
+ [input].flatten.inject(""){ |r,a|
29
+ r << [CGI.escape(a.split('/')[0]), a.split('/')[1] || "\"\""].join(",")
30
+ }
31
+ end
32
+
33
+ def tmp_csv name, source
34
+ @dir = Dir.mktmpdir
35
+ @path = [@dir, name].join("/")
36
+ @tmp_csv = CSV.open(@path, "wb") do |csv|
37
+ word_groups(source, { tagged: true }).each do |g|
38
+ csv << [
39
+ name,
40
+ "{#{g[:prefix].map { |w| split(w) }.join(",")}}",
41
+ split(g[:suffix])
42
+ ]
43
+ end
44
+ end
45
+ @path
46
+ end
47
+
48
+ def import_csv name, source
49
+ begin
50
+ @csv = tmp_csv(name, source)
51
+ @query = "COPY word_groups FROM '#{@dir}/#{name}' DELIMITER ',' CSV"
52
+ connection(@query)
53
+ ensure
54
+ @csv && FileUtils.remove_entry(File.dirname(@csv))
55
+ @word_groups = nil
56
+ end
57
+ end
58
+
59
+ def lookup word, source
60
+ @query = "SELECT suffix, count(*) AS count
61
+ FROM word_groups
62
+ WHERE prefix[5] = '#{word}'
63
+ AND source = '#{source}'
64
+ GROUP BY suffix"
65
+ connection(@query).values
66
+ end
67
+
68
+ def csv_sources
69
+ @query = "SELECT DISTINCT source
70
+ FROM word_groups"
71
+ connection(@query).values.flatten
72
+ end
73
+
74
+ def json_sources
75
+ @query = "SELECT DISTINCT word_groups->'source'
76
+ FROM word_groups_jsonb"
77
+ connection(@query).values.flatten
78
+ end
79
+
80
+ private
81
+
82
+ def connection query=nil
83
+ begin
84
+ conn = PG.connect(config)
85
+ query && conn.exec(query)
86
+ ensure
87
+ conn.close
88
+ end
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,43 @@
1
+ module Markov
2
+ class Generate
3
+
4
+ attr_reader :length, :weight, :start
5
+
6
+ def initialize dbname, options={}
7
+ @length = options[:length] || 200
8
+ @weight = options[:weight] || 1.2
9
+ @start = options[:start] || "The"
10
+ @table = options[:table]
11
+ @db = DB.new(dbname: dbname, chunk: options[:chunk] || 4)
12
+ end
13
+
14
+ def current_word
15
+ @current_word = @word || @start
16
+ end
17
+
18
+ def next_word
19
+ word = current_word.match(/^,/) ? "," : current_word.split(",")[0]
20
+ words = lookup(CGI.escape(word), @table)
21
+ index = words[:probability].sample
22
+ @word = CGI.unescape(words[:suffices][index])
23
+ end
24
+
25
+ def lookup word, table
26
+ @lookup = @db.lookup(word, table)
27
+ {
28
+ suffices: @lookup.map { |s| s[0] },
29
+ probability: @lookup.map.each_with_index { |c,i|
30
+ [ (c[1].to_i ** @weight).round.times.inject([]){ |r,a| r << i } ]
31
+ }.flatten
32
+ }
33
+ end
34
+
35
+ def text
36
+ (0..@length).inject("#@start "){ |r,a|
37
+ next_word
38
+ r << "#{current_word.match(/^,/) ? "," : current_word.split(",")[0]} "
39
+ }.strip.squeeze(" ").gsub(/\s(\.|\,|:|;|`|'|\?|!)/,"\\1")
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,33 @@
1
+ module Markov
2
+ class Parser
3
+
4
+ attr_reader :source
5
+
6
+ def initialize source, options={}
7
+ @source = source
8
+ end
9
+
10
+ def raw_text
11
+ @raw_text ||= File.read(@source).scrub!
12
+ end
13
+
14
+ def tagged_text
15
+ tagger = EngTagger.new
16
+ @tagged_text ||= tagger.get_readable(raw_text)
17
+ end
18
+
19
+ def groups chunk_size, options={}
20
+ text = options[:tagged] ? tagged_text : raw_text
21
+ words = text.split
22
+ (chunk_size - 1).times { |r,a| words.unshift("\"\"") }
23
+
24
+ words.each_cons(chunk_size).to_a.inject([]){ |r,a|
25
+ r << {
26
+ prefix: a[0..(chunk_size - 2)],
27
+ suffix: a.last
28
+ }
29
+ }
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module Markov
2
+ VERSION = "0.0.1".freeze
3
+ end
metadata ADDED
@@ -0,0 +1,133 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: markov-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - rob allen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-07-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: engtagger
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.18.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.18.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.7'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '='
60
+ - !ruby/object:Gem::Version
61
+ version: 2.14.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '='
67
+ - !ruby/object:Gem::Version
68
+ version: 2.14.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-minitest
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 2.4.5
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 2.4.5
83
+ - !ruby/object:Gem::Dependency
84
+ name: byebug
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 9.0.5
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 9.0.5
97
+ description: A postgresql backed markov text generator.
98
+ email: rob.all3n@gmail.com
99
+ executables: []
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - bin/console
104
+ - lib/markov.rb
105
+ - lib/markov/db.rb
106
+ - lib/markov/generate.rb
107
+ - lib/markov/parser.rb
108
+ - lib/markov/version.rb
109
+ homepage: ''
110
+ licenses:
111
+ - MIT
112
+ metadata: {}
113
+ post_install_message:
114
+ rdoc_options: []
115
+ require_paths:
116
+ - lib
117
+ required_ruby_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ requirements: []
128
+ rubyforge_project:
129
+ rubygems_version: 2.5.1
130
+ signing_key:
131
+ specification_version: 4
132
+ summary: A postgresql backed markov text generator.
133
+ test_files: []