chomchom 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.DS_Store +0 -0
- data/chomchom.gemspec +1 -1
- data/lib/.DS_Store +0 -0
- data/lib/chomchom/.DS_Store +0 -0
- data/lib/chomchom/scorer.rb +6 -6
- data/lib/chomchom/version.rb +1 -1
- data/tests/.DS_Store +0 -0
- data/tests/scoring.rb +5 -7
- metadata +3 -3
data/.DS_Store
CHANGED
Binary file
|
data/chomchom.gemspec
CHANGED
data/lib/.DS_Store
CHANGED
Binary file
|
data/lib/chomchom/.DS_Store
CHANGED
Binary file
|
data/lib/chomchom/scorer.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
#coding: utf-8
|
2
|
-
require '
|
2
|
+
require 'grammar_cop'
|
3
3
|
require 'lingua/stemmer' #https://github.com/aurelian/ruby-stemmer
|
4
4
|
|
5
5
|
module Chomchom
|
@@ -45,7 +45,7 @@ module Chomchom
|
|
45
45
|
ss = summary.downcase.split_sentences.each { |s|
|
46
46
|
#take a flat 30% for every copied sentence
|
47
47
|
copy_taxes.push(0.3) if text.downcase.index(s.gsub(/[^\p{Word}]+$/,'').gsub(/^[^\p{Word}]+/,''))
|
48
|
-
proper_sentences += 1 if
|
48
|
+
proper_sentences += 1 if GrammarCop::Sentence.count_linkages(s, dictionary, parse_options) > 0
|
49
49
|
coverages.push(find_coverages(s, tss, stem_topics)) if s.match(/\p{Word}+/)
|
50
50
|
}
|
51
51
|
|
@@ -86,17 +86,17 @@ module Chomchom
|
|
86
86
|
|
87
87
|
private
|
88
88
|
#progressive length tax
|
89
|
-
#max = .
|
89
|
+
#max = .02 + .04 + .06 = .12 (12.0%)
|
90
90
|
#no punishment for short summary b/c itself won't be able to cover as much
|
91
91
|
def length_tax(summary_size)
|
92
92
|
if summary_size <= 100
|
93
93
|
0
|
94
94
|
elsif summary_size <= 200
|
95
|
-
(summary_size-100)*0.
|
95
|
+
(summary_size-100)*0.0002
|
96
96
|
elsif summary_size <= 300
|
97
|
-
0.
|
97
|
+
0.02 + (summary_size-200)*0.0004
|
98
98
|
else
|
99
|
-
0.
|
99
|
+
0.02 + 0.04 + (summary_size-300)*0.0006
|
100
100
|
end
|
101
101
|
end
|
102
102
|
|
data/lib/chomchom/version.rb
CHANGED
data/tests/.DS_Store
CHANGED
Binary file
|
data/tests/scoring.rb
CHANGED
@@ -18,10 +18,10 @@ agent = Mechanize.new
|
|
18
18
|
agent.user_agent = "chomchom request client"
|
19
19
|
fake_summary = "A look into Al Jazeera's difficulties in breaking into the US market, because they are the terrorist mouthpiece post 9/11. However, there are a handful of small cable companies who are unpatriotic by broadcasting it, and it hasn't been well received for its fresh and informative worldview coverage."
|
20
20
|
|
21
|
-
dict =
|
22
|
-
opts =
|
23
|
-
|
24
|
-
|
21
|
+
dict = GrammarCop::Dictionary.create("en")
|
22
|
+
opts = GrammarCop::ParseOptions.create
|
23
|
+
GrammarCop::ParseOptions.set_linkage_limit(opts, 10)
|
24
|
+
GrammarCop::ParseOptions.set_short_length(opts, 5)
|
25
25
|
urls.each_with_index do |url, i|
|
26
26
|
if i==i
|
27
27
|
agent.get(url)
|
@@ -47,6 +47,4 @@ urls.each_with_index do |url, i|
|
|
47
47
|
puts passage = doc.center_of_gravity()
|
48
48
|
puts scorer.score(text, passage, topic_words, dict, opts)
|
49
49
|
end
|
50
|
-
end
|
51
|
-
GrammarPolice::ParseOptions.parse_options_delete(opts)
|
52
|
-
GrammarPolice::Dictionary.destroy(dict)
|
50
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: chomchom
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.
|
5
|
+
version: 0.5.0
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Quan Nguyen
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-05-
|
13
|
+
date: 2011-05-08 00:00:00 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: mechanize
|
@@ -79,7 +79,7 @@ dependencies:
|
|
79
79
|
type: :runtime
|
80
80
|
version_requirements: *id006
|
81
81
|
- !ruby/object:Gem::Dependency
|
82
|
-
name:
|
82
|
+
name: grammar_cop
|
83
83
|
prerelease: false
|
84
84
|
requirement: &id007 !ruby/object:Gem::Requirement
|
85
85
|
none: false
|