markov-ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: e7dffa91c3e5e4a258f78dceda0418d5b047cb0e
4
+ data.tar.gz: e30b7d34af228f58425c9ab616b1a98f6129f0a7
5
+ SHA512:
6
+ metadata.gz: bfad5c546e6b1dd84b0d1c09eb68e8bb3a341393d8a514e256e846f2793877e6c6be3671433812452362b898bb1364353a2a08e202d46a70edcf556c774161c1
7
+ data.tar.gz: 987c317d1dedcd637fbfb1f429c854cb6769fd6aed49c093732ff64c00539c0fe3e5025d0594ddfb771799d82b7176de33378d5a9a7cf169c2cad83e99a3513e
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ lib = File.expand_path("../../lib", __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+
6
+ require "markov"
7
+ require "irb"
8
+
9
+ @source = "../test/fixtures/text_sample.txt"
10
+ @dbname = "markov_test"
11
+ @db = Markov::DB.new(dbname: @dbname, chunk: 4)
12
+ @g = Markov::Generate.new("markov_test")
13
+
14
+ IRB.start
@@ -0,0 +1,15 @@
1
+ require 'markov/generate'
2
+ require 'markov/version'
3
+ require 'markov/parser'
4
+ require 'markov/db'
5
+ require 'fileutils'
6
+ require 'engtagger'
7
+ require 'tmpdir'
8
+ require 'byebug'
9
+ require 'json'
10
+ require 'csv'
11
+ require 'cgi'
12
+ require 'pg'
13
+
14
+ module Markov
15
+ end
@@ -0,0 +1,92 @@
1
+ module Markov
2
+ class DB
3
+
4
+ def initialize options={}
5
+ @host = options[:host] || "localhost"
6
+ @user = options[:user] || "postgres"
7
+ @port = options[:port] || 5432
8
+ @dbname = options[:dbname] || ""
9
+ @password = options[:password] || ""
10
+ @chunk = options[:chunk] || 4
11
+ end
12
+
13
+ def config
14
+ {
15
+ host: @host,
16
+ port: @port,
17
+ user: @user,
18
+ dbname: @dbname,
19
+ password: @password
20
+ }
21
+ end
22
+
23
+ def word_groups source, options={}
24
+ @word_groups ||= Parser.new(source).groups(@chunk, options)
25
+ end
26
+
27
+ def split input
28
+ [input].flatten.inject(""){ |r,a|
29
+ r << [CGI.escape(a.split('/')[0]), a.split('/')[1] || "\"\""].join(",")
30
+ }
31
+ end
32
+
33
+ def tmp_csv name, source
34
+ @dir = Dir.mktmpdir
35
+ @path = [@dir, name].join("/")
36
+ @tmp_csv = CSV.open(@path, "wb") do |csv|
37
+ word_groups(source, { tagged: true }).each do |g|
38
+ csv << [
39
+ name,
40
+ "{#{g[:prefix].map { |w| split(w) }.join(",")}}",
41
+ split(g[:suffix])
42
+ ]
43
+ end
44
+ end
45
+ @path
46
+ end
47
+
48
+ def import_csv name, source
49
+ begin
50
+ @csv = tmp_csv(name, source)
51
+ @query = "COPY word_groups FROM '#{@dir}/#{name}' DELIMITER ',' CSV"
52
+ connection(@query)
53
+ ensure
54
+ @csv && FileUtils.remove_entry(File.dirname(@csv))
55
+ @word_groups = nil
56
+ end
57
+ end
58
+
59
+ def lookup word, source
60
+ @query = "SELECT suffix, count(*) AS count
61
+ FROM word_groups
62
+ WHERE prefix[5] = '#{word}'
63
+ AND source = '#{source}'
64
+ GROUP BY suffix"
65
+ connection(@query).values
66
+ end
67
+
68
+ def csv_sources
69
+ @query = "SELECT DISTINCT source
70
+ FROM word_groups"
71
+ connection(@query).values.flatten
72
+ end
73
+
74
+ def json_sources
75
+ @query = "SELECT DISTINCT word_groups->'source'
76
+ FROM word_groups_jsonb"
77
+ connection(@query).values.flatten
78
+ end
79
+
80
+ private
81
+
82
+ def connection query=nil
83
+ begin
84
+ conn = PG.connect(config)
85
+ query && conn.exec(query)
86
+ ensure
87
+ conn.close
88
+ end
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,43 @@
1
+ module Markov
2
+ class Generate
3
+
4
+ attr_reader :length, :weight, :start
5
+
6
+ def initialize dbname, options={}
7
+ @length = options[:length] || 200
8
+ @weight = options[:weight] || 1.2
9
+ @start = options[:start] || "The"
10
+ @table = options[:table]
11
+ @db = DB.new(dbname: dbname, chunk: options[:chunk] || 4)
12
+ end
13
+
14
+ def current_word
15
+ @current_word = @word || @start
16
+ end
17
+
18
+ def next_word
19
+ word = current_word.match(/^,/) ? "," : current_word.split(",")[0]
20
+ words = lookup(CGI.escape(word), @table)
21
+ index = words[:probability].sample
22
+ @word = CGI.unescape(words[:suffices][index])
23
+ end
24
+
25
+ def lookup word, table
26
+ @lookup = @db.lookup(word, table)
27
+ {
28
+ suffices: @lookup.map { |s| s[0] },
29
+ probability: @lookup.map.each_with_index { |c,i|
30
+ [ (c[1].to_i ** @weight).round.times.inject([]){ |r,a| r << i } ]
31
+ }.flatten
32
+ }
33
+ end
34
+
35
+ def text
36
+ (0..@length).inject("#@start "){ |r,a|
37
+ next_word
38
+ r << "#{current_word.match(/^,/) ? "," : current_word.split(",")[0]} "
39
+ }.strip.squeeze(" ").gsub(/\s(\.|\,|:|;|`|'|\?|!)/,"\\1")
40
+ end
41
+
42
+ end
43
+ end
@@ -0,0 +1,33 @@
1
+ module Markov
2
+ class Parser
3
+
4
+ attr_reader :source
5
+
6
+ def initialize source, options={}
7
+ @source = source
8
+ end
9
+
10
+ def raw_text
11
+ @raw_text ||= File.read(@source).scrub!
12
+ end
13
+
14
+ def tagged_text
15
+ tagger = EngTagger.new
16
+ @tagged_text ||= tagger.get_readable(raw_text)
17
+ end
18
+
19
+ def groups chunk_size, options={}
20
+ text = options[:tagged] ? tagged_text : raw_text
21
+ words = text.split
22
+ (chunk_size - 1).times { |r,a| words.unshift("\"\"") }
23
+
24
+ words.each_cons(chunk_size).to_a.inject([]){ |r,a|
25
+ r << {
26
+ prefix: a[0..(chunk_size - 2)],
27
+ suffix: a.last
28
+ }
29
+ }
30
+ end
31
+
32
+ end
33
+ end
@@ -0,0 +1,3 @@
1
+ module Markov
2
+ VERSION = "0.0.1".freeze
3
+ end
metadata ADDED
@@ -0,0 +1,133 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: markov-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - rob allen
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-07-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: engtagger
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.2.0
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.2.0
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 0.18.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 0.18.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.7'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.7'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '='
60
+ - !ruby/object:Gem::Version
61
+ version: 2.14.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '='
67
+ - !ruby/object:Gem::Version
68
+ version: 2.14.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-minitest
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '='
74
+ - !ruby/object:Gem::Version
75
+ version: 2.4.5
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '='
81
+ - !ruby/object:Gem::Version
82
+ version: 2.4.5
83
+ - !ruby/object:Gem::Dependency
84
+ name: byebug
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '='
88
+ - !ruby/object:Gem::Version
89
+ version: 9.0.5
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '='
95
+ - !ruby/object:Gem::Version
96
+ version: 9.0.5
97
+ description: A postgresql backed markov text generator.
98
+ email: rob.all3n@gmail.com
99
+ executables: []
100
+ extensions: []
101
+ extra_rdoc_files: []
102
+ files:
103
+ - bin/console
104
+ - lib/markov.rb
105
+ - lib/markov/db.rb
106
+ - lib/markov/generate.rb
107
+ - lib/markov/parser.rb
108
+ - lib/markov/version.rb
109
+ homepage: ''
110
+ licenses:
111
+ - MIT
112
+ metadata: {}
113
+ post_install_message:
114
+ rdoc_options: []
115
+ require_paths:
116
+ - lib
117
+ required_ruby_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ required_rubygems_version: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ requirements: []
128
+ rubyforge_project:
129
+ rubygems_version: 2.5.1
130
+ signing_key:
131
+ specification_version: 4
132
+ summary: A postgresql backed markov text generator.
133
+ test_files: []