twitter_ebooks 2.0.4 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -40,15 +40,8 @@ module Ebooks
40
40
  log "Tokenizing #{sentences.length} sentences"
41
41
  @sentences = sentences.map { |sent| NLP.tokenize(sent) }
42
42
 
43
- log "Building markov model"
44
- @markov = MarkovModel.build(@sentences)
45
-
46
43
  log "Ranking keywords"
47
- require 'benchmark'
48
- puts Benchmark.measure {
49
- @keywords = NLP.keywords(@sentences)
50
- p @keywords.top(100)
51
- }
44
+ @keywords = NLP.keywords(@sentences)
52
45
 
53
46
  self
54
47
  end
@@ -75,10 +68,10 @@ module Ebooks
75
68
  end
76
69
 
77
70
  def markov_statement(limit=140, markov=nil)
78
- markov ||= @markov
71
+ markov ||= MarkovModel.build(@sentences)
79
72
  tweet = ""
80
73
 
81
- while (tweet = markov.generate) do
74
+ while (tweet = markov.generate(@sentences)) do
82
75
  next if tweet.length > limit
83
76
  next if NLP.unmatched_enclosers?(tweet)
84
77
  break if tweet.length > limit*0.4 || rand > 0.8
@@ -113,9 +106,6 @@ module Ebooks
113
106
  # First try
114
107
  relevant, slightly_relevant = relevant_sentences(input)
115
108
 
116
- p relevant
117
- p slightly_relevant.length
118
-
119
109
  if relevant.length >= 3
120
110
  markov = MarkovModel.new.consume(relevant)
121
111
  markov_statement(limit, markov)
@@ -23,10 +23,6 @@ module Ebooks
23
23
  def self.adjectives
24
24
  @adjectives ||= File.read(File.join(DATA_PATH, 'adjectives.txt')).split
25
25
  end
26
-
27
- def self.wordfreq
28
- @wordfreq ||= JSON.load(File.read(File.join(DATA_PATH, 'wordfreq.json')))
29
- end
30
26
 
31
27
  # POS tagger
32
28
  def self.tagger
@@ -94,25 +90,6 @@ module Ebooks
94
90
  text.keywords
95
91
  end
96
92
 
97
- def self.stemset(sentence)
98
- tokens = sentence.is_a?(Array) ? sentence : tokenize(sentence)
99
- tokens.map(&:downcase)
100
- .reject { |token| stopwords.include?(token) }
101
- .map { |t| stemmer.stem(t) }
102
- .to_set
103
- end
104
-
105
- # Builds a token stem frequency map
106
- def self.stemfreq(sentences)
107
- freqmap = {}
108
- sentences.flatten.each do |token|
109
- stem = NLP.stem(token)
110
- freqmap[stem] ||= 0
111
- freqmap[stem] += 1
112
- end
113
- freqmap
114
- end
115
-
116
93
  # Takes a list of tokens and builds a nice-looking sentence
117
94
  def self.reconstruct(tokens)
118
95
  text = ""
@@ -1,3 +1,3 @@
1
1
  module Ebooks
2
- VERSION = "2.0.4"
2
+ VERSION = "2.0.5"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: twitter_ebooks
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -171,11 +171,9 @@ files:
171
171
  - README.md
172
172
  - Rakefile
173
173
  - bin/ebooks
174
- - data/ANC-all-count.txt
175
174
  - data/adjectives.txt
176
175
  - data/nouns.txt
177
176
  - data/stopwords.txt
178
- - data/wordfreq.json
179
177
  - lib/twitter_ebooks.rb
180
178
  - lib/twitter_ebooks/archiver.rb
181
179
  - lib/twitter_ebooks/bot.rb