markov_words 2.0.1 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 962d1783383aa1d75e121e932a2d1dbde2ef487a
4
- data.tar.gz: 677c629d528e959762d13b88073ed4a246d9a449
3
+ metadata.gz: f4c5006ece0a2f2eb6e4f30ac865ef48ba18ed80
4
+ data.tar.gz: dea0a20dc2d7f05f38f55821c7e88ad254aa5470
5
5
  SHA512:
6
- metadata.gz: 6dcbc277e2fdb99a202935e5f76fd607064cd89d9bf15b8a99959e1d8c27214620ef9ead098629cf02d79d9d977e588dbb090d589289dce5997a414a7516e703
7
- data.tar.gz: 60d870efaefdd70579e46c6984412ad2bc2bf20d326ac8f32b10d0b4a5037d9771306452347774fc1b28b4e738dfa6a6ca2ec5eb7b0604c44de610cbbbe6a5cb
6
+ metadata.gz: 1c6f73c9a170deae7f904635eb326b71effa6ed6f0a45da190f8b5e9009606956efd686e3de16f69ce0e5e9c4a4def2774d5b52186d2fbc0df48ca83de343584
7
+ data.tar.gz: 35edfedcbaa4501ab892def54cc213e641195c6bd4c7c7e52d3044bb8f2c7772a0439289958461f92c310c5760f8f3ba78ba7472a7daf3b4d95c5359df59b8f5
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- markov_words (2.0.1)
4
+ markov_words (2.0.2)
5
5
  sqlite3 (~> 1.3)
6
6
 
7
7
  GEM
data/bin/benchmark CHANGED
@@ -5,80 +5,84 @@ require 'benchmark'
5
5
  require 'bundler/setup'
6
6
  require 'markov_words'
7
7
 
8
- # Utility class to generate benchmarks for MarkovWords
9
- class GeneratorBenchmark
10
- LABEL_WIDTH = 7
11
- def run
12
- test_if_desired 'initial database creation time versus gram size' do
13
- Benchmark.bm(LABEL_WIDTH) do |x|
14
- @min_gram_size.upto(@max_gram_size) do |size|
15
- generator =
16
- MarkovWords::Generator.new(flush_data: true,
17
- gram_size: size,
18
- corpus_file: @corpus_file)
19
- x.report("size: #{size}") { generator.word }
20
- end
21
- end
22
- end
8
+ if ARGV.empty?
9
+ puts 'USAGE: bin/benchmark min_gram_size max_gram_size corpus_file'
10
+ puts 'EXAMPLE: bin/benchmark 2 6 /usr/share/dict/words'
11
+ end
23
12
 
24
- test_if_desired 'existing database on disk, initial memory load' do
25
- Benchmark.bm(LABEL_WIDTH) do |x|
26
- @min_gram_size.upto(@max_gram_size) do |size|
27
- generator =
28
- MarkovWords::Generator.new(flush_data: true,
29
- gram_size: size,
30
- corpus_file: @corpus_file)
31
- _word = generator.word # this will run initial setup
32
- generator_load_data_from_file =
33
- MarkovWords::Generator.new(gram_size: size,
34
- corpus_file: @corpus_file)
35
- x.report("size: #{size}") { generator_load_data_from_file.word }
36
- end
37
- end
38
- end
13
+ @min_gram_size = ARGV[0].to_i || 1
14
+ @max_gram_size = ARGV[1].to_i || 6
15
+ @corpus_file = ARGV[2] || '/usr/share/dict/words'
16
+ @label_width = 7
39
17
 
40
- test_if_desired 'word generation averages for 100 words per gram size' do
41
- Benchmark.bm(LABEL_WIDTH) do |x|
18
+ puts "Minimum n-gram size set to #{@min_gram_size}"
19
+ puts "Maximum n-gram size set to #{@max_gram_size}"
20
+ puts "Corpus file set to #{@corpus_file}"
21
+
22
+ def print_separator
23
+ printf "%s\n", Array.new(70).map { '-' }.join
24
+ end
25
+
26
+ def test_if_desired(description, **opts)
27
+ printf "\n%s", "Test #{description}? (y/n) "
28
+ if /y/.match?($stdin.readline)
29
+ print_separator
30
+ if opts.fetch :benchmark, true
31
+ Benchmark.bm(@label_width) do |report|
42
32
  @min_gram_size.upto(@max_gram_size) do |size|
43
- generator =
44
- MarkovWords::Generator.new(flush_data: true,
45
- gram_size: size,
46
- perform_caching: false,
47
- corpus_file: @corpus_file)
48
- _word = generator.word # this will run initial setup
49
- x.report("size: #{size}") { 1.upto(100) { generator.word } }
33
+ yield(report, size) if block_given?
50
34
  end
51
35
  end
36
+ else
37
+ @min_gram_size.upto(@max_gram_size) do |size|
38
+ yield(size) if block_given?
39
+ end
52
40
  end
41
+ print_separator
53
42
  end
43
+ end
54
44
 
55
- def initialize(opts)
56
- @min_gram_size = opts.fetch :min_gram_size, 1
57
- @max_gram_size = opts.fetch :max_gram_size, 6
58
- @corpus_file = opts.fetch :corpus_file, '/usr/share/dict/words'
59
- puts "Minimum n-gram size set to #{@min_gram_size}"
60
- puts "Maximum n-gram size set to #{@max_gram_size}"
61
- puts "Corpus file set to #{@corpus_file}"
62
- end
45
+ def new_generator(**opts)
46
+ opts = {
47
+ flush_data: true,
48
+ corpus_file: @corpus_file
49
+ }.merge(opts)
63
50
 
64
- def print_separator
65
- printf "%s\n", Array.new(60).map { '-' }.join
66
- end
51
+ generator = MarkovWords::Generator.new(opts)
52
+ _word = generator.word if opts.fetch(:pre_seed, false)
67
53
 
68
- def test_if_desired(description, &block)
69
- printf "\n%s", "Test #{description}? (y/n) "
70
- if /y/.match?($stdin.readline)
71
- print_separator
72
- yield(block)
73
- print_separator
74
- end
75
- end
54
+ generator
76
55
  end
77
56
 
78
- if ARGV.empty?
79
- puts "USAGE: bin/benchmark min_gram_size max_gram_size corpus_file\n"
57
+ report_name = 'initial database creation time versus gram size'
58
+ test_if_desired report_name do |report, size|
59
+ generator = new_generator(gram_size: size)
60
+ report.report("size: #{size}") { generator.word }
61
+ end
62
+
63
+ report_name = 'existing database on disk, initial memory load'
64
+ test_if_desired report_name do |report, size|
65
+ new_generator(pre_seed: true, gram_size: size)
66
+ generator_existing_db = new_generator(flush_data: false, gram_size: size)
67
+ report.report("size: #{size}") { generator_existing_db.word }
68
+ end
69
+
70
+ report_name = 'word generation averages for 100 words per gram size'
71
+ test_if_desired report_name do |report, size|
72
+ generator = new_generator(pre_seed: true, gram_size: size)
73
+ report.report("size: #{size}") { 1.upto(100) { generator.word } }
74
+ end
75
+
76
+ test_if_desired 'word uniqueness per gram size', benchmark: false do |size|
77
+ generator = new_generator(gram_size: size)
78
+ num_words = 6000
79
+ seen_words = {}
80
+
81
+ num_words.times do
82
+ word = generator.word
83
+ seen_words[word].nil? ? seen_words[word] = 0 : seen_words[word] += 1
84
+ end
85
+ num_dupes = seen_words.find_all{ |_key, val| val > 1 }.length
86
+
87
+ puts "n-gram size: #{size}, #{num_dupes}/#{num_words} words were duplicates."
80
88
  end
81
- bm = GeneratorBenchmark.new(min_gram_size: ARGV[0].to_i,
82
- max_gram_size: ARGV[1].to_i,
83
- corpus_file: ARGV[2])
84
- bm.run
@@ -2,5 +2,5 @@
2
2
 
3
3
  module MarkovWords
4
4
  # Current version
5
- VERSION = '2.0.1'
5
+ VERSION = '2.0.2'
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markov_words
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Donald Merand