markov_words 2.0.1 → 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 962d1783383aa1d75e121e932a2d1dbde2ef487a
4
- data.tar.gz: 677c629d528e959762d13b88073ed4a246d9a449
3
+ metadata.gz: f4c5006ece0a2f2eb6e4f30ac865ef48ba18ed80
4
+ data.tar.gz: dea0a20dc2d7f05f38f55821c7e88ad254aa5470
5
5
  SHA512:
6
- metadata.gz: 6dcbc277e2fdb99a202935e5f76fd607064cd89d9bf15b8a99959e1d8c27214620ef9ead098629cf02d79d9d977e588dbb090d589289dce5997a414a7516e703
7
- data.tar.gz: 60d870efaefdd70579e46c6984412ad2bc2bf20d326ac8f32b10d0b4a5037d9771306452347774fc1b28b4e738dfa6a6ca2ec5eb7b0604c44de610cbbbe6a5cb
6
+ metadata.gz: 1c6f73c9a170deae7f904635eb326b71effa6ed6f0a45da190f8b5e9009606956efd686e3de16f69ce0e5e9c4a4def2774d5b52186d2fbc0df48ca83de343584
7
+ data.tar.gz: 35edfedcbaa4501ab892def54cc213e641195c6bd4c7c7e52d3044bb8f2c7772a0439289958461f92c310c5760f8f3ba78ba7472a7daf3b4d95c5359df59b8f5
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- markov_words (2.0.1)
4
+ markov_words (2.0.2)
5
5
  sqlite3 (~> 1.3)
6
6
 
7
7
  GEM
data/bin/benchmark CHANGED
@@ -5,80 +5,84 @@ require 'benchmark'
5
5
  require 'bundler/setup'
6
6
  require 'markov_words'
7
7
 
8
- # Utility class to generate benchmarks for MarkovWords
9
- class GeneratorBenchmark
10
- LABEL_WIDTH = 7
11
- def run
12
- test_if_desired 'initial database creation time versus gram size' do
13
- Benchmark.bm(LABEL_WIDTH) do |x|
14
- @min_gram_size.upto(@max_gram_size) do |size|
15
- generator =
16
- MarkovWords::Generator.new(flush_data: true,
17
- gram_size: size,
18
- corpus_file: @corpus_file)
19
- x.report("size: #{size}") { generator.word }
20
- end
21
- end
22
- end
8
+ if ARGV.empty?
9
+ puts 'USAGE: bin/benchmark min_gram_size max_gram_size corpus_file'
10
+ puts 'EXAMPLE: bin/benchmark 2 6 /usr/share/dict/words'
11
+ end
23
12
 
24
- test_if_desired 'existing database on disk, initial memory load' do
25
- Benchmark.bm(LABEL_WIDTH) do |x|
26
- @min_gram_size.upto(@max_gram_size) do |size|
27
- generator =
28
- MarkovWords::Generator.new(flush_data: true,
29
- gram_size: size,
30
- corpus_file: @corpus_file)
31
- _word = generator.word # this will run initial setup
32
- generator_load_data_from_file =
33
- MarkovWords::Generator.new(gram_size: size,
34
- corpus_file: @corpus_file)
35
- x.report("size: #{size}") { generator_load_data_from_file.word }
36
- end
37
- end
38
- end
13
+ @min_gram_size = ARGV[0].to_i || 1
14
+ @max_gram_size = ARGV[1].to_i || 6
15
+ @corpus_file = ARGV[2] || '/usr/share/dict/words'
16
+ @label_width = 7
39
17
 
40
- test_if_desired 'word generation averages for 100 words per gram size' do
41
- Benchmark.bm(LABEL_WIDTH) do |x|
18
+ puts "Minimum n-gram size set to #{@min_gram_size}"
19
+ puts "Maximum n-gram size set to #{@max_gram_size}"
20
+ puts "Corpus file set to #{@corpus_file}"
21
+
22
+ def print_separator
23
+ printf "%s\n", Array.new(70).map { '-' }.join
24
+ end
25
+
26
+ def test_if_desired(description, **opts)
27
+ printf "\n%s", "Test #{description}? (y/n) "
28
+ if /y/.match?($stdin.readline)
29
+ print_separator
30
+ if opts.fetch :benchmark, true
31
+ Benchmark.bm(@label_width) do |report|
42
32
  @min_gram_size.upto(@max_gram_size) do |size|
43
- generator =
44
- MarkovWords::Generator.new(flush_data: true,
45
- gram_size: size,
46
- perform_caching: false,
47
- corpus_file: @corpus_file)
48
- _word = generator.word # this will run initial setup
49
- x.report("size: #{size}") { 1.upto(100) { generator.word } }
33
+ yield(report, size) if block_given?
50
34
  end
51
35
  end
36
+ else
37
+ @min_gram_size.upto(@max_gram_size) do |size|
38
+ yield(size) if block_given?
39
+ end
52
40
  end
41
+ print_separator
53
42
  end
43
+ end
54
44
 
55
- def initialize(opts)
56
- @min_gram_size = opts.fetch :min_gram_size, 1
57
- @max_gram_size = opts.fetch :max_gram_size, 6
58
- @corpus_file = opts.fetch :corpus_file, '/usr/share/dict/words'
59
- puts "Minimum n-gram size set to #{@min_gram_size}"
60
- puts "Maximum n-gram size set to #{@max_gram_size}"
61
- puts "Corpus file set to #{@corpus_file}"
62
- end
45
+ def new_generator(**opts)
46
+ opts = {
47
+ flush_data: true,
48
+ corpus_file: @corpus_file
49
+ }.merge(opts)
63
50
 
64
- def print_separator
65
- printf "%s\n", Array.new(60).map { '-' }.join
66
- end
51
+ generator = MarkovWords::Generator.new(opts)
52
+ _word = generator.word if opts.fetch(:pre_seed, false)
67
53
 
68
- def test_if_desired(description, &block)
69
- printf "\n%s", "Test #{description}? (y/n) "
70
- if /y/.match?($stdin.readline)
71
- print_separator
72
- yield(block)
73
- print_separator
74
- end
75
- end
54
+ generator
76
55
  end
77
56
 
78
- if ARGV.empty?
79
- puts "USAGE: bin/benchmark min_gram_size max_gram_size corpus_file\n"
57
+ report_name = 'initial database creation time versus gram size'
58
+ test_if_desired report_name do |report, size|
59
+ generator = new_generator(gram_size: size)
60
+ report.report("size: #{size}") { generator.word }
61
+ end
62
+
63
+ report_name = 'existing database on disk, initial memory load'
64
+ test_if_desired report_name do |report, size|
65
+ new_generator(pre_seed: true, gram_size: size)
66
+ generator_existing_db = new_generator(flush_data: false, gram_size: size)
67
+ report.report("size: #{size}") { generator_existing_db.word }
68
+ end
69
+
70
+ report_name = 'word generation averages for 100 words per gram size'
71
+ test_if_desired report_name do |report, size|
72
+ generator = new_generator(pre_seed: true, gram_size: size)
73
+ report.report("size: #{size}") { 1.upto(100) { generator.word } }
74
+ end
75
+
76
+ test_if_desired 'word uniqueness per gram size', benchmark: false do |size|
77
+ generator = new_generator(gram_size: size)
78
+ num_words = 6000
79
+ seen_words = {}
80
+
81
+ num_words.times do
82
+ word = generator.word
83
+ seen_words[word].nil? ? seen_words[word] = 0 : seen_words[word] += 1
84
+ end
85
+ num_dupes = seen_words.find_all{ |_key, val| val > 1 }.length
86
+
87
+ puts "n-gram size: #{size}, #{num_dupes}/#{num_words} words were duplicates."
80
88
  end
81
- bm = GeneratorBenchmark.new(min_gram_size: ARGV[0].to_i,
82
- max_gram_size: ARGV[1].to_i,
83
- corpus_file: ARGV[2])
84
- bm.run
@@ -2,5 +2,5 @@
2
2
 
3
3
  module MarkovWords
4
4
  # Current version
5
- VERSION = '2.0.1'
5
+ VERSION = '2.0.2'
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: markov_words
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Donald Merand