charles 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,7 +16,7 @@ puts({
16
16
  :content => document.content,
17
17
  :interesting_content => document.interesting_content,
18
18
  :title => document.title,
19
- :filtered_images => document.filtered_images.collect{|image| image[:url]}
19
+ :filtered_images => document.filtered_images
20
20
  }.to_yaml)
21
21
 
22
22
 
@@ -43,6 +43,9 @@ module Charles
43
43
  end
44
44
 
45
45
  def filtered_images
46
+ filtered_images_extra.collect{|image| image[:url]}
47
+ end
48
+ def filtered_images_extra
46
49
  _max_proportion = 2.5
47
50
  _min_area = 88*88
48
51
  _filtered_images = []
@@ -33,8 +33,8 @@ module Charles
33
33
  _title_words[token]||=0; _title_words[token]+=1
34
34
  }
35
35
  }
36
- _threshold = (0.9 * @options[:sample_titles].size).ceil
37
- _words_to_filter = _title_words.select{|k,v| v >= _threshold}.collect{|k,v| k} #select words used in more than 90% of the titles
36
+ _threshold = (0.8 * @options[:sample_titles].size).ceil
37
+ _words_to_filter = _title_words.select{|k,v| v >= _threshold}.collect{|k,v| k} #select words used in more than 80% of the titles
38
38
  end
39
39
  end
40
40
  end
@@ -1,3 +1,3 @@
1
1
  module Charles
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -60,10 +60,11 @@ class CharlesTest < Test::Unit::TestCase
60
60
  article = TEST_ARTICLES.detect{|article| article[:url] == 'http://online.wsj.com/article/SB10001424052702303674004577433160886451978.html'}
61
61
  input = File.read("test/articles/#{article[:file]}.html")
62
62
  document = Charles::Document.new(input, :url => article[:url])
63
- assert document.filtered_images.size > 3
64
- assert document.filtered_images.last[:data].size > 1000
65
- assert document.filtered_images.last[:width] > 100
66
- assert document.filtered_images.last[:height] > 100
63
+ filtered_images_extra = document.filtered_images_extra
64
+ assert filtered_images_extra.size > 3
65
+ assert filtered_images_extra.last[:data].size > 1000
66
+ assert filtered_images_extra.last[:width] > 100
67
+ assert filtered_images_extra.last[:height] > 100
67
68
  end
68
69
 
69
70
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charles
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 5
10
- version: 0.0.5
9
+ - 6
10
+ version: 0.0.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jason Ling Xiaowei