twitter_ebooks 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,15 +40,8 @@ module Ebooks
40
40
  log "Tokenizing #{sentences.length} sentences"
41
41
  @sentences = sentences.map { |sent| NLP.tokenize(sent) }
42
42
 
43
- log "Building markov model"
44
- @markov = MarkovModel.build(@sentences)
45
-
46
43
  log "Ranking keywords"
47
- require 'benchmark'
48
- puts Benchmark.measure {
49
- @keywords = NLP.keywords(@sentences)
50
- p @keywords.top(100)
51
- }
44
+ @keywords = NLP.keywords(@sentences)
52
45
 
53
46
  self
54
47
  end
@@ -75,10 +68,10 @@ module Ebooks
75
68
  end
76
69
 
77
70
  def markov_statement(limit=140, markov=nil)
78
- markov ||= @markov
71
+ markov ||= MarkovModel.build(@sentences)
79
72
  tweet = ""
80
73
 
81
- while (tweet = markov.generate) do
74
+ while (tweet = markov.generate(@sentences)) do
82
75
  next if tweet.length > limit
83
76
  next if NLP.unmatched_enclosers?(tweet)
84
77
  break if tweet.length > limit*0.4 || rand > 0.8
@@ -113,9 +106,6 @@ module Ebooks
113
106
  # First try
114
107
  relevant, slightly_relevant = relevant_sentences(input)
115
108
 
116
- p relevant
117
- p slightly_relevant.length
118
-
119
109
  if relevant.length >= 3
120
110
  markov = MarkovModel.new.consume(relevant)
121
111
  markov_statement(limit, markov)
@@ -23,10 +23,6 @@ module Ebooks
23
23
  def self.adjectives
24
24
  @adjectives ||= File.read(File.join(DATA_PATH, 'adjectives.txt')).split
25
25
  end
26
-
27
- def self.wordfreq
28
- @wordfreq ||= JSON.load(File.read(File.join(DATA_PATH, 'wordfreq.json')))
29
- end
30
26
 
31
27
  # POS tagger
32
28
  def self.tagger
@@ -94,25 +90,6 @@ module Ebooks
94
90
  text.keywords
95
91
  end
96
92
 
97
- def self.stemset(sentence)
98
- tokens = sentence.is_a?(Array) ? sentence : tokenize(sentence)
99
- tokens.map(&:downcase)
100
- .reject { |token| stopwords.include?(token) }
101
- .map { |t| stemmer.stem(t) }
102
- .to_set
103
- end
104
-
105
- # Builds a token stem frequency map
106
- def self.stemfreq(sentences)
107
- freqmap = {}
108
- sentences.flatten.each do |token|
109
- stem = NLP.stem(token)
110
- freqmap[stem] ||= 0
111
- freqmap[stem] += 1
112
- end
113
- freqmap
114
- end
115
-
116
93
  # Takes a list of tokens and builds a nice-looking sentence
117
94
  def self.reconstruct(tokens)
118
95
  text = ""
@@ -1,3 +1,3 @@
1
1
  module Ebooks
2
- VERSION = "2.0.4"
2
+ VERSION = "2.0.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_ebooks
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -171,11 +171,9 @@ files:
171
171
  - README.md
172
172
  - Rakefile
173
173
  - bin/ebooks
174
- - data/ANC-all-count.txt
175
174
  - data/adjectives.txt
176
175
  - data/nouns.txt
177
176
  - data/stopwords.txt
178
- - data/wordfreq.json
179
177
  - lib/twitter_ebooks.rb
180
178
  - lib/twitter_ebooks/archiver.rb
181
179
  - lib/twitter_ebooks/bot.rb