charles 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,7 +16,7 @@ puts({
16
16
  :content => document.content,
17
17
  :interesting_content => document.interesting_content,
18
18
  :title => document.title,
19
- :filtered_images => document.filtered_images.collect{|image| image[:url]}
19
+ :filtered_images => document.filtered_images
20
20
  }.to_yaml)
21
21
 
22
22
 
@@ -43,6 +43,9 @@ module Charles
43
43
  end
44
44
 
45
45
  def filtered_images
46
+ filtered_images_extra.collect{|image| image[:url]}
47
+ end
48
+ def filtered_images_extra
46
49
  _max_proportion = 2.5
47
50
  _min_area = 88*88
48
51
  _filtered_images = []
@@ -33,8 +33,8 @@ module Charles
33
33
  _title_words[token]||=0; _title_words[token]+=1
34
34
  }
35
35
  }
36
- _threshold = (0.9 * @options[:sample_titles].size).ceil
37
- _words_to_filter = _title_words.select{|k,v| v >= _threshold}.collect{|k,v| k} #select words used in more than 90% of the titles
36
+ _threshold = (0.8 * @options[:sample_titles].size).ceil
37
+ _words_to_filter = _title_words.select{|k,v| v >= _threshold}.collect{|k,v| k} #select words used in more than 80% of the titles
38
38
  end
39
39
  end
40
40
  end
@@ -1,3 +1,3 @@
1
1
  module Charles
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
@@ -60,10 +60,11 @@ class CharlesTest < Test::Unit::TestCase
60
60
  article = TEST_ARTICLES.detect{|article| article[:url] == 'http://online.wsj.com/article/SB10001424052702303674004577433160886451978.html'}
61
61
  input = File.read("test/articles/#{article[:file]}.html")
62
62
  document = Charles::Document.new(input, :url => article[:url])
63
- assert document.filtered_images.size > 3
64
- assert document.filtered_images.last[:data].size > 1000
65
- assert document.filtered_images.last[:width] > 100
66
- assert document.filtered_images.last[:height] > 100
63
+ filtered_images_extra = document.filtered_images_extra
64
+ assert filtered_images_extra.size > 3
65
+ assert filtered_images_extra.last[:data].size > 1000
66
+ assert filtered_images_extra.last[:width] > 100
67
+ assert filtered_images_extra.last[:height] > 100
67
68
  end
68
69
 
69
70
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: charles
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 5
10
- version: 0.0.5
9
+ - 6
10
+ version: 0.0.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jason Ling Xiaowei