shiner 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/shiner.rb ADDED
@@ -0,0 +1,57 @@
1
+ #require "shiner/version"
2
+
3
+ require 'rubygems'
4
+ require 'bundler/setup'
5
+ require 'pp'
6
+
7
+ require 'tactful_tokenizer'
8
+ require 'classifier'
9
+
10
+ module Shiner
11
+ def self.shine(string, options={:max_length => 188})
12
+ best = string_to_best_sentences(string, options)
13
+ best[:sentences].collect{|sentence| sentence[:sentence]}.join(' ')
14
+ end
15
+
16
+ def self.string_to_best_sentences(string, options={})
17
+ sentences = string_to_scored_sentences(string)
18
+ options[:max_sentences]||=sentences.size
19
+ batches = []
20
+ sentences.each_index{|index|
21
+ batch={:sentences => sentences[index, options[:max_sentences]]}
22
+ batch[:sentences].pop while options[:max_length] && batch[:sentences].collect{|sentence| sentence[:sentence]}.join(' ').length > options[:max_length]
23
+ batch[:score] = batch[:sentences].collect{|sentence| sentence[:score]}.sum
24
+ batches << batch
25
+ }
26
+ best = batches.sort_by{|batch| batch[:score]}.last
27
+ end
28
+
29
+ def self.string_to_scored_sentences(string)
30
+ sentences=[]
31
+ string_to_sentences(string).each{ |sentence|
32
+ classifications = classifier.classifications(sentence)
33
+ sentences << {:sentence => sentence, :classifications => classifications,
34
+ :score => (classifications['Interesting'] - classifications['Uninteresting']) / (-classifications['Uninteresting']) }
35
+ }
36
+ #sentences = sentences.sort_by{|sentence| sentence[:score] }
37
+ sentences
38
+ end
39
+
40
+ def self.classifier
41
+ @classifier ||= (
42
+ b = Classifier::Bayes.new 'Interesting', 'Uninteresting'
43
+ File.read(File.dirname(__FILE__) + '/../data/interesting.txt').split("\n").each { |i|
44
+ b.train_interesting i
45
+ }
46
+ File.read(File.dirname(__FILE__) + '/../data/uninteresting.txt').split("\n").each { |i|
47
+ b.train_uninteresting i
48
+ }
49
+ b
50
+ )
51
+ end
52
+
53
+ def self.string_to_sentences(string)
54
+ @tactful_tokenizer ||= TactfulTokenizer::Model.new
55
+ @tactful_tokenizer.tokenize_text(string)
56
+ end
57
+ end
@@ -0,0 +1,3 @@
1
+ module Shiner
2
+ VERSION = "0.0.2"
3
+ end
data/shiner.gemspec ADDED
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/shiner/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Jason Ling Xiaowei"]
6
+ gem.email = ["jason@jeyel.com"]
7
+ gem.description = 'Extract the most interesting sentences from an article.'
8
+ gem.summary = 'Extract the most interesting sentences from an article.'
9
+ gem.homepage = "https://github.com/jlxw/shiner"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "shiner"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = Shiner::VERSION
17
+
18
+ gem.add_dependency "tactful_tokenizer"
19
+ gem.add_dependency "classifier"
20
+ end
@@ -0,0 +1,63 @@
1
+ require 'test/unit'
2
+ require 'shiner'
3
+ #require 'active_support/testing/assertions'
4
+
5
+
6
+ class ShinerTest < Test::Unit::TestCase
7
+ def test_truth
8
+ doc = <<-EOF
9
+ Facebook shares have dipped below $29 for the first time since their flotation to a new low.
10
+
11
+ The shares were launched at $38 less than two weeks ago during its high-profile flotation, which valued the firm at $104bn (�66bn).
12
+
13
+ Since then, Facebook's shares have lost almost a quarter of their value.
14
+
15
+ Russia's biggest social network, VKontakte, has now postponed its stock market launch, fearing a repeat of Facebook's problems.
16
+
17
+ "The IPO of FB [Facebook] destroyed the faith of many private investors in social networks," said chief executive Pavel Durov in a message posted on Twitter.
18
+
19
+ Facebook shares closed down 9.6% to $28.82, just above its low of $28.78.
20
+
21
+ One reason for the fall in shares since its initial public offering (IPO) is that Tuesday was the first day that options on Facebook stock began trading.
22
+
23
+ Options are a form of derivative, that allow bets on the future direction of the stock. It appears that most investors are betting Facebook shares will head lower.
24
+
25
+ "Facebook's stock market debut is already going down as one of the most troubled of recent years," says BBC technology correspondent Rory Cellan-Jones.
26
+
27
+ The situation is a remarkable turnaround from recent weeks, when the eight-year-old firm's share sale was over-subscribed.
28
+ Lawsuits
29
+
30
+ The social networking site has transformed the way in which hundreds of millions of people around the world communicate. It is also transforming the way companies advertise to existing and potential customers.
31
+
32
+ But Facebook's 900 million users helped the company generate just $1bn in profit last year, and there are concerns about its ability to increase profits in the future.
33
+
34
+ The flotation was disrupted on its first day of trading by technical glitches on the Nasdaq stock exchange. The share price has since slumped amid worries that the company was over-valued by advisers marketing the float.
35
+
36
+ Now, a group of investors has issued a class-action lawsuit alleging that Facebook revenues were revised down because of a surge in the number of people using mobile devices for apps and connection to websites.
37
+
38
+ The suit targets Facebook, its founder Mark Zuckerberg and the banks behind the flotation, including lead underwriter Morgan Stanley.
39
+
40
+ The share sale in New York raised $16bn for Facebook.
41
+
42
+ Recent reports suggested that Facebook is to launch its own smartphone by next year.
43
+ EOF
44
+ result = Shiner.shine(doc, :max_length => 388)
45
+ pp [result, result.size]
46
+ assert result.size > 100
47
+ assert result.size < 388
48
+ result = Shiner.shine(doc, :max_length => 888)
49
+ pp [result, result.size]
50
+ assert result.size > 100
51
+ assert result.size < 888
52
+ result = Shiner.shine(doc, :max_length => 1888)
53
+ pp [result, result.size]
54
+ assert result.size > 100
55
+ assert result.size < 1888
56
+ result = Shiner.shine(doc, :max_sentences => 1)
57
+ pp [result, result.size]
58
+ result = Shiner.shine(doc, :max_sentences => 2)
59
+ pp [result, result.size]
60
+ result = Shiner.shine(doc, :max_sentences => 3)
61
+ pp [result, result.size]
62
+ end
63
+ end
metadata ADDED
@@ -0,0 +1,104 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: shiner
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 2
10
+ version: 0.0.2
11
+ platform: ruby
12
+ authors:
13
+ - Jason Ling Xiaowei
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2012-05-30 00:00:00 Z
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: tactful_tokenizer
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ hash: 3
29
+ segments:
30
+ - 0
31
+ version: "0"
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: classifier
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ hash: 3
43
+ segments:
44
+ - 0
45
+ version: "0"
46
+ type: :runtime
47
+ version_requirements: *id002
48
+ description: Extract the most interesting sentences from an article.
49
+ email:
50
+ - jason@jeyel.com
51
+ executables: []
52
+
53
+ extensions: []
54
+
55
+ extra_rdoc_files: []
56
+
57
+ files:
58
+ - .gitignore
59
+ - Gemfile
60
+ - LICENSE
61
+ - README.md
62
+ - Rakefile
63
+ - data/README
64
+ - data/interesting.txt
65
+ - data/uninteresting.txt
66
+ - lib/shiner.rb
67
+ - lib/shiner/version.rb
68
+ - shiner.gemspec
69
+ - test/test_shiner.rb
70
+ homepage: https://github.com/jlxw/shiner
71
+ licenses: []
72
+
73
+ post_install_message:
74
+ rdoc_options: []
75
+
76
+ require_paths:
77
+ - lib
78
+ required_ruby_version: !ruby/object:Gem::Requirement
79
+ none: false
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ hash: 3
84
+ segments:
85
+ - 0
86
+ version: "0"
87
+ required_rubygems_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ">="
91
+ - !ruby/object:Gem::Version
92
+ hash: 3
93
+ segments:
94
+ - 0
95
+ version: "0"
96
+ requirements: []
97
+
98
+ rubyforge_project:
99
+ rubygems_version: 1.8.13
100
+ signing_key:
101
+ specification_version: 3
102
+ summary: Extract the most interesting sentences from an article.
103
+ test_files:
104
+ - test/test_shiner.rb