linkterm 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/License.txt +20 -0
- data/Manifest.txt +16 -0
- data/README.txt +24 -0
- data/Rakefile +123 -0
- data/lib/linkterm/version.rb +9 -0
- data/lib/linkterm.rb +212 -0
- data/scripts/txt2html +67 -0
- data/setup.rb +1585 -0
- data/test/test_helper.rb +2 -0
- data/test/test_linkterm.rb +11 -0
- data/website/index.html +92 -0
- data/website/index.txt +38 -0
- data/website/javascripts/rounded_corners_lite.inc.js +285 -0
- data/website/stylesheets/screen.css +138 -0
- data/website/template.rhtml +48 -0
- metadata +67 -0
data/History.txt
ADDED
data/License.txt
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2007 Linkterm
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Manifest.txt
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
History.txt
|
2
|
+
License.txt
|
3
|
+
Manifest.txt
|
4
|
+
README.txt
|
5
|
+
Rakefile
|
6
|
+
lib/linkterm.rb
|
7
|
+
lib/linkterm/version.rb
|
8
|
+
scripts/txt2html
|
9
|
+
setup.rb
|
10
|
+
test/test_helper.rb
|
11
|
+
test/test_linkterm.rb
|
12
|
+
website/index.html
|
13
|
+
website/index.txt
|
14
|
+
website/javascripts/rounded_corners_lite.inc.js
|
15
|
+
website/stylesheets/screen.css
|
16
|
+
website/template.rhtml
|
data/README.txt
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
README for linkterm
|
2
|
+
===================
|
3
|
+
|
4
|
+
== INSTALL
|
5
|
+
ruby-mecabを使っているため、以下のものを事前にインストールする必要があります。
|
6
|
+
|
7
|
+
* mecab
|
8
|
+
* rb-mecab
|
9
|
+
|
10
|
+
これらのインストールが完了すると、Rubyで
|
11
|
+
require 'MeCab'できるようになります。
|
12
|
+
|
13
|
+
OS Xでのインストール方法は
|
14
|
+
http://ujihisa.nowa.jp/entry/0da2db2747
|
15
|
+
にあります。
|
16
|
+
|
17
|
+
== How to use
|
18
|
+
コマンドラインインタフェースを用意しています。
|
19
|
+
example/example.rbを実行して、適当なキーワードを入力してください。
|
20
|
+
|
21
|
+
example.rbはexample/docs以下のファイルから知識を得、
|
22
|
+
その知識に基づいて入力されたキーワードに対応する
|
23
|
+
ルールを出力します。
|
24
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
require 'rake/clean'
|
4
|
+
require 'rake/testtask'
|
5
|
+
require 'rake/packagetask'
|
6
|
+
require 'rake/gempackagetask'
|
7
|
+
require 'rake/rdoctask'
|
8
|
+
require 'rake/contrib/rubyforgepublisher'
|
9
|
+
require 'fileutils'
|
10
|
+
require 'hoe'
|
11
|
+
|
12
|
+
include FileUtils
|
13
|
+
require File.join(File.dirname(__FILE__), 'lib', 'linkterm', 'version')
|
14
|
+
|
15
|
+
AUTHOR = 'Linkterm' # can also be an array of Authors
|
16
|
+
EMAIL = "ujihisa gmail com"
|
17
|
+
DESCRIPTION = "description of gem"
|
18
|
+
GEM_NAME = 'linkterm' # what ppl will type to install your gem
|
19
|
+
|
20
|
+
@config_file = "~/.rubyforge/user-config.yml"
|
21
|
+
@config = nil
|
22
|
+
def rubyforge_username
|
23
|
+
unless @config
|
24
|
+
begin
|
25
|
+
@config = YAML.load(File.read(File.expand_path(@config_file)))
|
26
|
+
rescue
|
27
|
+
puts <<-EOS
|
28
|
+
ERROR: No rubyforge config file found: #{@config_file}"
|
29
|
+
Run 'rubyforge setup' to prepare your env for access to Rubyforge
|
30
|
+
- See http://newgem.rubyforge.org/rubyforge.html for more details
|
31
|
+
EOS
|
32
|
+
exit
|
33
|
+
end
|
34
|
+
end
|
35
|
+
@rubyforge_username ||= @config["username"]
|
36
|
+
end
|
37
|
+
|
38
|
+
RUBYFORGE_PROJECT = 'linkterm' # The unix name for your project
|
39
|
+
HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
|
40
|
+
DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
|
41
|
+
|
42
|
+
NAME = "linkterm"
|
43
|
+
REV = nil
|
44
|
+
# UNCOMMENT IF REQUIRED:
|
45
|
+
# REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
|
46
|
+
VERS = Linkterm::VERSION::STRING + (REV ? ".#{REV}" : "")
|
47
|
+
CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
|
48
|
+
RDOC_OPTS = ['--quiet', '--title', 'linkterm documentation',
|
49
|
+
"--opname", "index.html",
|
50
|
+
"--line-numbers",
|
51
|
+
"--main", "README",
|
52
|
+
"--inline-source"]
|
53
|
+
|
54
|
+
class Hoe
|
55
|
+
def extra_deps
|
56
|
+
@extra_deps.reject { |x| Array(x).first == 'hoe' }
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Generate all the Rake tasks
|
61
|
+
# Run 'rake -T' to see list of generated tasks (from gem root directory)
|
62
|
+
hoe = Hoe.new(GEM_NAME, VERS) do |p|
|
63
|
+
p.author = AUTHOR
|
64
|
+
p.description = DESCRIPTION
|
65
|
+
p.email = EMAIL
|
66
|
+
p.summary = DESCRIPTION
|
67
|
+
p.url = HOMEPATH
|
68
|
+
p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
|
69
|
+
p.test_globs = ["test/**/test_*.rb"]
|
70
|
+
p.clean_globs |= CLEAN #An array of file patterns to delete on clean.
|
71
|
+
|
72
|
+
# == Optional
|
73
|
+
p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
|
74
|
+
#p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
|
75
|
+
#p.spec_extras = {} # A hash of extra values to set in the gemspec.
|
76
|
+
end
|
77
|
+
|
78
|
+
CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\n\n")
|
79
|
+
PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
|
80
|
+
hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
|
81
|
+
|
82
|
+
desc 'Generate website files'
|
83
|
+
task :website_generate do
|
84
|
+
Dir['website/**/*.txt'].each do |txt|
|
85
|
+
sh %{ ruby scripts/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
desc 'Upload website files to rubyforge'
|
90
|
+
task :website_upload do
|
91
|
+
host = "#{rubyforge_username}@rubyforge.org"
|
92
|
+
remote_dir = "/var/www/gforge-projects/#{PATH}/"
|
93
|
+
local_dir = 'website'
|
94
|
+
sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
|
95
|
+
end
|
96
|
+
|
97
|
+
desc 'Generate and upload website files'
|
98
|
+
task :website => [:website_generate, :website_upload, :publish_docs]
|
99
|
+
|
100
|
+
desc 'Release the website and new gem version'
|
101
|
+
task :deploy => [:check_version, :website, :release] do
|
102
|
+
puts "Remember to create SVN tag:"
|
103
|
+
puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
|
104
|
+
"svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
|
105
|
+
puts "Suggested comment:"
|
106
|
+
puts "Tagging release #{CHANGES}"
|
107
|
+
end
|
108
|
+
|
109
|
+
desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
|
110
|
+
task :local_deploy => [:website_generate, :install_gem]
|
111
|
+
|
112
|
+
task :check_version do
|
113
|
+
unless ENV['VERSION']
|
114
|
+
puts 'Must pass a VERSION=x.y.z release version'
|
115
|
+
exit
|
116
|
+
end
|
117
|
+
unless ENV['VERSION'] == VERS
|
118
|
+
puts "Please update your version.rb to match the release version, currently #{VERS}"
|
119
|
+
exit
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
|
data/lib/linkterm.rb
ADDED
@@ -0,0 +1,212 @@
|
|
1
|
+
$KCODE = 'u'
|
2
|
+
require 'MeCab'
|
3
|
+
require 'pp'
|
4
|
+
require 'set'
|
5
|
+
require 'mathn'
|
6
|
+
require 'linktermlisc'
|
7
|
+
require 'nkf'
|
8
|
+
|
9
|
+
class Array
|
10
|
+
def to_set
|
11
|
+
Set.new self
|
12
|
+
end
|
13
|
+
|
14
|
+
def parm
|
15
|
+
'[ [x,y] | x <- self, y <- self, x != y]'.lisc_do binding
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_hash
|
19
|
+
Hash[*self.flatten]
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
module Enumerable
|
24
|
+
def partitions
|
25
|
+
self.inject({}) {|hash, i| (hash[yield(i)] ||= []) << i; hash }.
|
26
|
+
map {|key, value| value }
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
module MeCab
|
31
|
+
class Node
|
32
|
+
include Enumerable
|
33
|
+
|
34
|
+
def category
|
35
|
+
return self.feature.split(/,/)[0]
|
36
|
+
end
|
37
|
+
|
38
|
+
def each(&b)
|
39
|
+
b[self]
|
40
|
+
self.next.each(&b) if self.next
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# 進捗経過を出力するためのもの
|
46
|
+
# example:
|
47
|
+
# cs = %w(the quick silver)
|
48
|
+
# counter = Counter.new(a.size)
|
49
|
+
# cs.each do |c|
|
50
|
+
# counter.display!
|
51
|
+
# puts c
|
52
|
+
# end
|
53
|
+
class Counter
|
54
|
+
def initialize(max, message = nil, output = STDOUT)
|
55
|
+
@i = 0
|
56
|
+
@max = max
|
57
|
+
@message = message.nil? ? '' : "[#{message}]: "
|
58
|
+
end
|
59
|
+
|
60
|
+
def display!
|
61
|
+
output.puts "#{@message} #{@i += 1}/#{@max}"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
#######################################################################
|
66
|
+
module Linkterm
|
67
|
+
class Base
|
68
|
+
def initialize(options = {})
|
69
|
+
@doc_dir = options[:doc_dir] || 'docs'
|
70
|
+
|
71
|
+
@documents = Documents.new Dir.open(@doc_dir).reject {|f| /^\.+$/ =~ f }.map {|f| "#{@doc_dir}/" + f }
|
72
|
+
end
|
73
|
+
|
74
|
+
def idf(word)
|
75
|
+
unless @documents_count
|
76
|
+
all_count = Hash.new(0)
|
77
|
+
counter = Counter.new(@documents.filenames.length, 'idf')
|
78
|
+
@documents.each do |document|
|
79
|
+
counter.display!
|
80
|
+
words = Set.new []
|
81
|
+
document.each {|paragraph|
|
82
|
+
paragraph.each {|node|
|
83
|
+
words << node.surface.downcase if node.category == '名詞' and
|
84
|
+
node.surface.length > 1 and
|
85
|
+
not /^\W+$/ =~ node.surface and
|
86
|
+
not /^\d+$/ =~ node.surface
|
87
|
+
}
|
88
|
+
}
|
89
|
+
words.each {|word| all_count[word] += 1 }
|
90
|
+
end
|
91
|
+
@documents_count = all_count
|
92
|
+
end
|
93
|
+
|
94
|
+
Math.log( @documents.filenames.length / @documents_count[word] )
|
95
|
+
end
|
96
|
+
|
97
|
+
def rule_table
|
98
|
+
table = []
|
99
|
+
counter = Counter.new(@documents.filenames.size, 'trans to table')
|
100
|
+
#@documents.each(0..3) do |document|
|
101
|
+
@documents.each() do |document|
|
102
|
+
counter.display!
|
103
|
+
document.each {|paragraph|
|
104
|
+
hash = Hash.new 0
|
105
|
+
paragraph.each {|node|
|
106
|
+
hash[node.surface.downcase] += 1 if node.category == '名詞' and
|
107
|
+
node.surface.length > 2 and
|
108
|
+
not /^\W+$/ =~ node.surface and
|
109
|
+
not /^\d+$/ =~ node.surface
|
110
|
+
}
|
111
|
+
word_count = hash
|
112
|
+
# TF-IDF値の高いものみ取り出す
|
113
|
+
word_count = word_count.sort_by {|word, count| count * idf(word) }.reverse[0...3].to_hash
|
114
|
+
#word_count = word_count.select {|word, count| count * idf(word) > 1.5 }.to_hash
|
115
|
+
|
116
|
+
word_count = word_count.map {|word, _| word }.sort
|
117
|
+
table << word_count unless word_count.empty?
|
118
|
+
}
|
119
|
+
end
|
120
|
+
|
121
|
+
# rule induction
|
122
|
+
table = table.map {|line|
|
123
|
+
arr = line.partitions {|term| (/^[a-z_]+$/ =~ term) ? :eng : (term.length > 10) ? :long : :short }
|
124
|
+
arr << [] until arr.size >= 3
|
125
|
+
arr
|
126
|
+
}
|
127
|
+
|
128
|
+
array = (0...table.first.length).to_a.parm
|
129
|
+
counter = Counter.new(array.length, 'induce rule')
|
130
|
+
rules = array.map {|c1, c2|
|
131
|
+
counter.display!
|
132
|
+
pattern = '[ [x,y] | x <- table.map{|u| u[c1] }.to_set, y <- table.map{|u| u[c2] }.to_set, !x.empty?, !y.empty? ]'.lisc_do binding
|
133
|
+
pattern.map {|a1, a2|
|
134
|
+
spt = table.select {|u| u[c1] == a1 and u[c2] == a2 }.length / table.select {|u| u[c1] == a1 }.length
|
135
|
+
{:a1 => a1, :a2 => a2, :spt => spt}
|
136
|
+
}
|
137
|
+
}.flatten.select {|r| r[:spt] >= 1/2 }
|
138
|
+
|
139
|
+
rules
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
class Documents
|
144
|
+
attr_reader :filenames
|
145
|
+
include Enumerable
|
146
|
+
|
147
|
+
def initialize(filenames)
|
148
|
+
@filenames = filenames
|
149
|
+
end
|
150
|
+
|
151
|
+
def each(range = 0...@filenames.size)
|
152
|
+
filenames[range].each {|filename|
|
153
|
+
document = Document.new(File.open(filename, 'r') {|io| io.read })
|
154
|
+
yield document
|
155
|
+
}
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
class Document
|
160
|
+
include Enumerable
|
161
|
+
|
162
|
+
def initialize(text)
|
163
|
+
@text = text
|
164
|
+
end
|
165
|
+
|
166
|
+
def each
|
167
|
+
ps = @text.gsub(/<.*?>|nbsp/, ' ').split(/(\r?\n\s*){2}/)
|
168
|
+
# 短すぎるパラグラフは連結する
|
169
|
+
ps2 = ps.inject(['']) {|result, item|
|
170
|
+
(item.size > 150 ? result : result.last << ' ') << item
|
171
|
+
result
|
172
|
+
}
|
173
|
+
|
174
|
+
ps2.each {|paragraph|
|
175
|
+
yield Paragraph.new(paragraph)
|
176
|
+
}
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
class Paragraph
|
181
|
+
include Enumerable
|
182
|
+
|
183
|
+
def initialize(text)
|
184
|
+
@text = text
|
185
|
+
end
|
186
|
+
|
187
|
+
def each
|
188
|
+
mecab = MeCab::Tagger.new("-Ochasen")
|
189
|
+
mecab.parseToNode(NKF.nkf('-w', @text)).each {|node|
|
190
|
+
yield node
|
191
|
+
}
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
|
197
|
+
# CUI
|
198
|
+
linkterm = Linkterm::Base.new
|
199
|
+
rule_table = linkterm.rule_table
|
200
|
+
pp rule_table
|
201
|
+
puts "input?"
|
202
|
+
loop do
|
203
|
+
str = NKF.nkf('-w', gets.chop)
|
204
|
+
rule_table.select {|record|
|
205
|
+
record[:a1].any? {|r| /^#{str}/ =~ r } ||
|
206
|
+
record[:a2].any? {|r| /^#{str}/ =~ r }
|
207
|
+
}.each {|record|
|
208
|
+
puts "[#{record[:a1].join(', ')}]\t\t-> [#{record[:a2].join(', ')}]"
|
209
|
+
}
|
210
|
+
end
|
211
|
+
|
212
|
+
|
data/scripts/txt2html
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'redcloth'
|
5
|
+
require 'syntax/convertors/html'
|
6
|
+
require 'erb'
|
7
|
+
require File.dirname(__FILE__) + '/../lib/linkterm/version.rb'
|
8
|
+
|
9
|
+
version = Linkterm::VERSION::STRING
|
10
|
+
download = 'http://rubyforge.org/projects/linkterm'
|
11
|
+
|
12
|
+
class Fixnum
|
13
|
+
def ordinal
|
14
|
+
# teens
|
15
|
+
return 'th' if (10..19).include?(self % 100)
|
16
|
+
# others
|
17
|
+
case self % 10
|
18
|
+
when 1: return 'st'
|
19
|
+
when 2: return 'nd'
|
20
|
+
when 3: return 'rd'
|
21
|
+
else return 'th'
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
class Time
|
27
|
+
def pretty
|
28
|
+
return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
def convert_syntax(syntax, source)
|
33
|
+
return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
|
34
|
+
end
|
35
|
+
|
36
|
+
if ARGV.length >= 1
|
37
|
+
src, template = ARGV
|
38
|
+
template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
|
39
|
+
|
40
|
+
else
|
41
|
+
puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
|
42
|
+
exit!
|
43
|
+
end
|
44
|
+
|
45
|
+
template = ERB.new(File.open(template).read)
|
46
|
+
|
47
|
+
title = nil
|
48
|
+
body = nil
|
49
|
+
File.open(src) do |fsrc|
|
50
|
+
title_text = fsrc.readline
|
51
|
+
body_text = fsrc.read
|
52
|
+
syntax_items = []
|
53
|
+
body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
|
54
|
+
ident = syntax_items.length
|
55
|
+
element, syntax, source = $1, $2, $3
|
56
|
+
syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
|
57
|
+
"syntax-temp-#{ident}"
|
58
|
+
}
|
59
|
+
title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
|
60
|
+
body = RedCloth.new(body_text).to_html
|
61
|
+
body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
|
62
|
+
end
|
63
|
+
stat = File.stat(src)
|
64
|
+
created = stat.ctime
|
65
|
+
modified = stat.mtime
|
66
|
+
|
67
|
+
$stdout << template.result(binding)
|