linkterm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt ADDED
@@ -0,0 +1,4 @@
1
+ == 0.0.1 2007-07-08
2
+
3
+ * 1 major enhancement:
4
+ * Initial release
data/License.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2007 Linkterm
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Manifest.txt ADDED
@@ -0,0 +1,16 @@
1
+ History.txt
2
+ License.txt
3
+ Manifest.txt
4
+ README.txt
5
+ Rakefile
6
+ lib/linkterm.rb
7
+ lib/linkterm/version.rb
8
+ scripts/txt2html
9
+ setup.rb
10
+ test/test_helper.rb
11
+ test/test_linkterm.rb
12
+ website/index.html
13
+ website/index.txt
14
+ website/javascripts/rounded_corners_lite.inc.js
15
+ website/stylesheets/screen.css
16
+ website/template.rhtml
data/README.txt ADDED
@@ -0,0 +1,24 @@
1
+ README for linkterm
2
+ ===================
3
+
4
+ == INSTALL
5
+ ruby-mecabを使っているため、以下のものを事前にインストールする必要があります。
6
+
7
+ * mecab
8
+ * rb-mecab
9
+
10
+ これらのインストールが完了すると、Rubyで
11
+ require 'MeCab'できるようになります。
12
+
13
+ OS Xでのインストール方法は
14
+ http://ujihisa.nowa.jp/entry/0da2db2747
15
+ にあります。
16
+
17
+ == How to use
18
+ コマンドラインインタフェースを用意しています。
19
+ example/example.rbを実行して、適当なキーワードを入力してください。
20
+
21
+ example.rbはexample/docs以下のファイルから知識を得、
22
+ その知識に基づいて入力されたキーワードに対応する
23
+ ルールを出力します。
24
+
data/Rakefile ADDED
@@ -0,0 +1,123 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/clean'
4
+ require 'rake/testtask'
5
+ require 'rake/packagetask'
6
+ require 'rake/gempackagetask'
7
+ require 'rake/rdoctask'
8
+ require 'rake/contrib/rubyforgepublisher'
9
+ require 'fileutils'
10
+ require 'hoe'
11
+
12
+ include FileUtils
13
+ require File.join(File.dirname(__FILE__), 'lib', 'linkterm', 'version')
14
+
15
+ AUTHOR = 'Linkterm' # can also be an array of Authors
16
+ EMAIL = "ujihisa gmail com"
17
+ DESCRIPTION = "description of gem"
18
+ GEM_NAME = 'linkterm' # what ppl will type to install your gem
19
+
20
+ @config_file = "~/.rubyforge/user-config.yml"
21
+ @config = nil
22
+ def rubyforge_username
23
+ unless @config
24
+ begin
25
+ @config = YAML.load(File.read(File.expand_path(@config_file)))
26
+ rescue
27
+ puts <<-EOS
28
+ ERROR: No rubyforge config file found: #{@config_file}"
29
+ Run 'rubyforge setup' to prepare your env for access to Rubyforge
30
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
31
+ EOS
32
+ exit
33
+ end
34
+ end
35
+ @rubyforge_username ||= @config["username"]
36
+ end
37
+
38
+ RUBYFORGE_PROJECT = 'linkterm' # The unix name for your project
39
+ HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
40
+ DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
41
+
42
+ NAME = "linkterm"
43
+ REV = nil
44
+ # UNCOMMENT IF REQUIRED:
45
+ # REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
46
+ VERS = Linkterm::VERSION::STRING + (REV ? ".#{REV}" : "")
47
+ CLEAN.include ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store']
48
+ RDOC_OPTS = ['--quiet', '--title', 'linkterm documentation',
49
+ "--opname", "index.html",
50
+ "--line-numbers",
51
+ "--main", "README",
52
+ "--inline-source"]
53
+
54
+ class Hoe
55
+ def extra_deps
56
+ @extra_deps.reject { |x| Array(x).first == 'hoe' }
57
+ end
58
+ end
59
+
60
+ # Generate all the Rake tasks
61
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
62
+ hoe = Hoe.new(GEM_NAME, VERS) do |p|
63
+ p.author = AUTHOR
64
+ p.description = DESCRIPTION
65
+ p.email = EMAIL
66
+ p.summary = DESCRIPTION
67
+ p.url = HOMEPATH
68
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
69
+ p.test_globs = ["test/**/test_*.rb"]
70
+ p.clean_globs |= CLEAN #An array of file patterns to delete on clean.
71
+
72
+ # == Optional
73
+ p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
74
+ #p.extra_deps = [] # An array of rubygem dependencies [name, version], e.g. [ ['active_support', '>= 1.3.1'] ]
75
+ #p.spec_extras = {} # A hash of extra values to set in the gemspec.
76
+ end
77
+
78
+ CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\n\n")
79
+ PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
80
+ hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
81
+
82
+ desc 'Generate website files'
83
+ task :website_generate do
84
+ Dir['website/**/*.txt'].each do |txt|
85
+ sh %{ ruby scripts/txt2html #{txt} > #{txt.gsub(/txt$/,'html')} }
86
+ end
87
+ end
88
+
89
+ desc 'Upload website files to rubyforge'
90
+ task :website_upload do
91
+ host = "#{rubyforge_username}@rubyforge.org"
92
+ remote_dir = "/var/www/gforge-projects/#{PATH}/"
93
+ local_dir = 'website'
94
+ sh %{rsync -aCv #{local_dir}/ #{host}:#{remote_dir}}
95
+ end
96
+
97
+ desc 'Generate and upload website files'
98
+ task :website => [:website_generate, :website_upload, :publish_docs]
99
+
100
+ desc 'Release the website and new gem version'
101
+ task :deploy => [:check_version, :website, :release] do
102
+ puts "Remember to create SVN tag:"
103
+ puts "svn copy svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/trunk " +
104
+ "svn+ssh://#{rubyforge_username}@rubyforge.org/var/svn/#{PATH}/tags/REL-#{VERS} "
105
+ puts "Suggested comment:"
106
+ puts "Tagging release #{CHANGES}"
107
+ end
108
+
109
+ desc 'Runs tasks website_generate and install_gem as a local deployment of the gem'
110
+ task :local_deploy => [:website_generate, :install_gem]
111
+
112
+ task :check_version do
113
+ unless ENV['VERSION']
114
+ puts 'Must pass a VERSION=x.y.z release version'
115
+ exit
116
+ end
117
+ unless ENV['VERSION'] == VERS
118
+ puts "Please update your version.rb to match the release version, currently #{VERS}"
119
+ exit
120
+ end
121
+ end
122
+
123
+
@@ -0,0 +1,9 @@
1
+ module Linkterm #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 0
5
+ TINY = 1
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
data/lib/linkterm.rb ADDED
@@ -0,0 +1,212 @@
1
+ $KCODE = 'u'
2
+ require 'MeCab'
3
+ require 'pp'
4
+ require 'set'
5
+ require 'mathn'
6
+ require 'linktermlisc'
7
+ require 'nkf'
8
+
9
+ class Array
10
+ def to_set
11
+ Set.new self
12
+ end
13
+
14
+ def parm
15
+ '[ [x,y] | x <- self, y <- self, x != y]'.lisc_do binding
16
+ end
17
+
18
+ def to_hash
19
+ Hash[*self.flatten]
20
+ end
21
+ end
22
+
23
+ module Enumerable
24
+ def partitions
25
+ self.inject({}) {|hash, i| (hash[yield(i)] ||= []) << i; hash }.
26
+ map {|key, value| value }
27
+ end
28
+ end
29
+
30
+ module MeCab
31
+ class Node
32
+ include Enumerable
33
+
34
+ def category
35
+ return self.feature.split(/,/)[0]
36
+ end
37
+
38
+ def each(&b)
39
+ b[self]
40
+ self.next.each(&b) if self.next
41
+ end
42
+ end
43
+ end
44
+
45
+ # 進捗経過を出力するためのもの
46
+ # example:
47
+ # cs = %w(the quick silver)
48
+ # counter = Counter.new(a.size)
49
+ # cs.each do |c|
50
+ # counter.display!
51
+ # puts c
52
+ # end
53
+ class Counter
54
+ def initialize(max, message = nil, output = STDOUT)
55
+ @i = 0
56
+ @max = max
57
+ @message = message.nil? ? '' : "[#{message}]: "
58
+ end
59
+
60
+ def display!
61
+ output.puts "#{@message} #{@i += 1}/#{@max}"
62
+ end
63
+ end
64
+
65
+ #######################################################################
66
+ module Linkterm
67
+ class Base
68
+ def initialize(options = {})
69
+ @doc_dir = options[:doc_dir] || 'docs'
70
+
71
+ @documents = Documents.new Dir.open(@doc_dir).reject {|f| /^\.+$/ =~ f }.map {|f| "#{@doc_dir}/" + f }
72
+ end
73
+
74
+ def idf(word)
75
+ unless @documents_count
76
+ all_count = Hash.new(0)
77
+ counter = Counter.new(@documents.filenames.length, 'idf')
78
+ @documents.each do |document|
79
+ counter.display!
80
+ words = Set.new []
81
+ document.each {|paragraph|
82
+ paragraph.each {|node|
83
+ words << node.surface.downcase if node.category == '名詞' and
84
+ node.surface.length > 1 and
85
+ not /^\W+$/ =~ node.surface and
86
+ not /^\d+$/ =~ node.surface
87
+ }
88
+ }
89
+ words.each {|word| all_count[word] += 1 }
90
+ end
91
+ @documents_count = all_count
92
+ end
93
+
94
+ Math.log( @documents.filenames.length / @documents_count[word] )
95
+ end
96
+
97
+ def rule_table
98
+ table = []
99
+ counter = Counter.new(@documents.filenames.size, 'trans to table')
100
+ #@documents.each(0..3) do |document|
101
+ @documents.each() do |document|
102
+ counter.display!
103
+ document.each {|paragraph|
104
+ hash = Hash.new 0
105
+ paragraph.each {|node|
106
+ hash[node.surface.downcase] += 1 if node.category == '名詞' and
107
+ node.surface.length > 2 and
108
+ not /^\W+$/ =~ node.surface and
109
+ not /^\d+$/ =~ node.surface
110
+ }
111
+ word_count = hash
112
+ # TF-IDF値の高いものみ取り出す
113
+ word_count = word_count.sort_by {|word, count| count * idf(word) }.reverse[0...3].to_hash
114
+ #word_count = word_count.select {|word, count| count * idf(word) > 1.5 }.to_hash
115
+
116
+ word_count = word_count.map {|word, _| word }.sort
117
+ table << word_count unless word_count.empty?
118
+ }
119
+ end
120
+
121
+ # rule induction
122
+ table = table.map {|line|
123
+ arr = line.partitions {|term| (/^[a-z_]+$/ =~ term) ? :eng : (term.length > 10) ? :long : :short }
124
+ arr << [] until arr.size >= 3
125
+ arr
126
+ }
127
+
128
+ array = (0...table.first.length).to_a.parm
129
+ counter = Counter.new(array.length, 'induce rule')
130
+ rules = array.map {|c1, c2|
131
+ counter.display!
132
+ pattern = '[ [x,y] | x <- table.map{|u| u[c1] }.to_set, y <- table.map{|u| u[c2] }.to_set, !x.empty?, !y.empty? ]'.lisc_do binding
133
+ pattern.map {|a1, a2|
134
+ spt = table.select {|u| u[c1] == a1 and u[c2] == a2 }.length / table.select {|u| u[c1] == a1 }.length
135
+ {:a1 => a1, :a2 => a2, :spt => spt}
136
+ }
137
+ }.flatten.select {|r| r[:spt] >= 1/2 }
138
+
139
+ rules
140
+ end
141
+ end
142
+
143
+ class Documents
144
+ attr_reader :filenames
145
+ include Enumerable
146
+
147
+ def initialize(filenames)
148
+ @filenames = filenames
149
+ end
150
+
151
+ def each(range = 0...@filenames.size)
152
+ filenames[range].each {|filename|
153
+ document = Document.new(File.open(filename, 'r') {|io| io.read })
154
+ yield document
155
+ }
156
+ end
157
+ end
158
+
159
+ class Document
160
+ include Enumerable
161
+
162
+ def initialize(text)
163
+ @text = text
164
+ end
165
+
166
+ def each
167
+ ps = @text.gsub(/<.*?>|nbsp/, ' ').split(/(\r?\n\s*){2}/)
168
+ # 短すぎるパラグラフは連結する
169
+ ps2 = ps.inject(['']) {|result, item|
170
+ (item.size > 150 ? result : result.last << ' ') << item
171
+ result
172
+ }
173
+
174
+ ps2.each {|paragraph|
175
+ yield Paragraph.new(paragraph)
176
+ }
177
+ end
178
+ end
179
+
180
+ class Paragraph
181
+ include Enumerable
182
+
183
+ def initialize(text)
184
+ @text = text
185
+ end
186
+
187
+ def each
188
+ mecab = MeCab::Tagger.new("-Ochasen")
189
+ mecab.parseToNode(NKF.nkf('-w', @text)).each {|node|
190
+ yield node
191
+ }
192
+ end
193
+ end
194
+ end
195
+
196
+
197
+ # CUI
198
+ linkterm = Linkterm::Base.new
199
+ rule_table = linkterm.rule_table
200
+ pp rule_table
201
+ puts "input?"
202
+ loop do
203
+ str = NKF.nkf('-w', gets.chop)
204
+ rule_table.select {|record|
205
+ record[:a1].any? {|r| /^#{str}/ =~ r } ||
206
+ record[:a2].any? {|r| /^#{str}/ =~ r }
207
+ }.each {|record|
208
+ puts "[#{record[:a1].join(', ')}]\t\t-> [#{record[:a2].join(', ')}]"
209
+ }
210
+ end
211
+
212
+
data/scripts/txt2html ADDED
@@ -0,0 +1,67 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ require 'redcloth'
5
+ require 'syntax/convertors/html'
6
+ require 'erb'
7
+ require File.dirname(__FILE__) + '/../lib/linkterm/version.rb'
8
+
9
+ version = Linkterm::VERSION::STRING
10
+ download = 'http://rubyforge.org/projects/linkterm'
11
+
12
+ class Fixnum
13
+ def ordinal
14
+ # teens
15
+ return 'th' if (10..19).include?(self % 100)
16
+ # others
17
+ case self % 10
18
+ when 1: return 'st'
19
+ when 2: return 'nd'
20
+ when 3: return 'rd'
21
+ else return 'th'
22
+ end
23
+ end
24
+ end
25
+
26
+ class Time
27
+ def pretty
28
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
29
+ end
30
+ end
31
+
32
+ def convert_syntax(syntax, source)
33
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
34
+ end
35
+
36
+ if ARGV.length >= 1
37
+ src, template = ARGV
38
+ template ||= File.dirname(__FILE__) + '/../website/template.rhtml'
39
+
40
+ else
41
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
42
+ exit!
43
+ end
44
+
45
+ template = ERB.new(File.open(template).read)
46
+
47
+ title = nil
48
+ body = nil
49
+ File.open(src) do |fsrc|
50
+ title_text = fsrc.readline
51
+ body_text = fsrc.read
52
+ syntax_items = []
53
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</>!m){
54
+ ident = syntax_items.length
55
+ element, syntax, source = $1, $2, $3
56
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
57
+ "syntax-temp-#{ident}"
58
+ }
59
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
60
+ body = RedCloth.new(body_text).to_html
61
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
62
+ end
63
+ stat = File.stat(src)
64
+ created = stat.ctime
65
+ modified = stat.mtime
66
+
67
+ $stdout << template.result(binding)