charles 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/charles +1 -1
- data/lib/charles/images.rb +3 -0
- data/lib/charles/internal_attributes.rb +2 -2
- data/lib/charles/version.rb +1 -1
- data/test/test_charles.rb +5 -4
- metadata +3 -3
data/bin/charles
CHANGED
@@ -16,7 +16,7 @@ puts({
|
|
16
16
|
:content => document.content,
|
17
17
|
:interesting_content => document.interesting_content,
|
18
18
|
:title => document.title,
|
19
|
-
:filtered_images => document.filtered_images
|
19
|
+
:filtered_images => document.filtered_images
|
20
20
|
}.to_yaml)
|
21
21
|
|
22
22
|
|
data/lib/charles/images.rb
CHANGED
@@ -33,8 +33,8 @@ module Charles
|
|
33
33
|
_title_words[token]||=0; _title_words[token]+=1
|
34
34
|
}
|
35
35
|
}
|
36
|
-
_threshold = (0.
|
37
|
-
_words_to_filter = _title_words.select{|k,v| v >= _threshold}.collect{|k,v| k} #select words used in more than
|
36
|
+
_threshold = (0.8 * @options[:sample_titles].size).ceil
|
37
|
+
_words_to_filter = _title_words.select{|k,v| v >= _threshold}.collect{|k,v| k} #select words used in more than 80% of the titles
|
38
38
|
end
|
39
39
|
end
|
40
40
|
end
|
data/lib/charles/version.rb
CHANGED
data/test/test_charles.rb
CHANGED
@@ -60,10 +60,11 @@ class CharlesTest < Test::Unit::TestCase
|
|
60
60
|
article = TEST_ARTICLES.detect{|article| article[:url] == 'http://online.wsj.com/article/SB10001424052702303674004577433160886451978.html'}
|
61
61
|
input = File.read("test/articles/#{article[:file]}.html")
|
62
62
|
document = Charles::Document.new(input, :url => article[:url])
|
63
|
-
|
64
|
-
assert
|
65
|
-
assert
|
66
|
-
assert
|
63
|
+
filtered_images_extra = document.filtered_images_extra
|
64
|
+
assert filtered_images_extra.size > 3
|
65
|
+
assert filtered_images_extra.last[:data].size > 1000
|
66
|
+
assert filtered_images_extra.last[:width] > 100
|
67
|
+
assert filtered_images_extra.last[:height] > 100
|
67
68
|
end
|
68
69
|
|
69
70
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: charles
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 6
|
10
|
+
version: 0.0.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Jason Ling Xiaowei
|