mecab-heavy 0.996.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/examples/test.rb +57 -0
- data/ext/mecab/extconf.rb +50 -0
- data/ext/mecab/mecab_wrap.cpp +6840 -0
- data/ext/mecab/parallel_make.rb +39 -0
- data/ext/mecab/patch/prefix.patch +18 -0
- metadata +96 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: e69b362e2214752e4967dcec97c1d7cbf5bd73f7
|
|
4
|
+
data.tar.gz: 94ae7c66dbd725729267173db93ee955e07cf1be
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 41b20517634cf31969e47bd6003f32ff2e212a6e9cf3e50002ec72f12f16bfaebcd667973d3b11f255a6c2d67b6237f0dbc780a042302d30a37fd5bdd3ff3ae6
|
|
7
|
+
data.tar.gz: cd8529fd78cd33ce9f52dd4b9f571112b9bb8b7762cdc556c7e1365cc1cf361e5bb56042d195d54207361f4cc0bf595ff03de3448dce2a36b03b2dbd2545e074
|
data/examples/test.rb
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
#!/usr/bin/ruby
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
|
|
4
|
+
require 'mecab'
|
|
5
|
+
sentence = "太郎はこの本を二郎を見た女性に渡した。"
|
|
6
|
+
|
|
7
|
+
print MeCab::VERSION, "\n"
|
|
8
|
+
model = MeCab::Model.new(ARGV.join(" "))
|
|
9
|
+
tagger = model.createTagger()
|
|
10
|
+
|
|
11
|
+
puts tagger.parse(sentence)
|
|
12
|
+
|
|
13
|
+
n = tagger.parseToNode(sentence)
|
|
14
|
+
|
|
15
|
+
while n do
|
|
16
|
+
print n.surface, "\t", n.feature, "\t", n.cost, "\n"
|
|
17
|
+
n = n.next
|
|
18
|
+
end
|
|
19
|
+
print "EOS\n";
|
|
20
|
+
|
|
21
|
+
lattice = MeCab::Lattice.new()
|
|
22
|
+
lattice.set_sentence(sentence)
|
|
23
|
+
tagger.parse(lattice)
|
|
24
|
+
len = lattice.size()
|
|
25
|
+
for i in 0..len
|
|
26
|
+
b = lattice.begin_nodes(i)
|
|
27
|
+
while b do
|
|
28
|
+
printf "B[%d] %s\t%s\n", i, b.surface, b.feature;
|
|
29
|
+
b = b.bnext
|
|
30
|
+
end
|
|
31
|
+
e = lattice.end_nodes(i)
|
|
32
|
+
while e do
|
|
33
|
+
printf "E[%d] %s\t%s\n", i, e.surface, e.feature;
|
|
34
|
+
e = e.bnext
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
print "EOS\n";
|
|
38
|
+
|
|
39
|
+
lattice.set_sentence(sentence)
|
|
40
|
+
lattice.set_request_type(MeCab::MECAB_NBEST)
|
|
41
|
+
tagger.parse(lattice)
|
|
42
|
+
for i in 0..10
|
|
43
|
+
lattice.next()
|
|
44
|
+
print lattice.toString()
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
d = model.dictionary_info()
|
|
48
|
+
while d do
|
|
49
|
+
printf "filename: %s\n", d.filename
|
|
50
|
+
printf "charset: %s\n", d.charset
|
|
51
|
+
printf "size: %d\n", d.size
|
|
52
|
+
printf "type: %d\n", d.type
|
|
53
|
+
printf "lsize: %d\n", d.lsize
|
|
54
|
+
printf "rsize: %d\n", d.rsize
|
|
55
|
+
printf "version: %d\n", d.version
|
|
56
|
+
d = d.next
|
|
57
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
require 'rbconfig'
|
|
2
|
+
RbConfig::MAKEFILE_CONFIG['CXX'] = ENV['CXX'] if ENV['CXX']
|
|
3
|
+
RbConfig::MAKEFILE_CONFIG['CC'] = ENV['CC'] if ENV['CC']
|
|
4
|
+
|
|
5
|
+
require 'mkmf'
|
|
6
|
+
require 'mini_portile'
|
|
7
|
+
|
|
8
|
+
require_relative 'parallel_make'
|
|
9
|
+
|
|
10
|
+
find_executable('make')
|
|
11
|
+
|
|
12
|
+
def cook_internal(name, version, url, patches = [])
|
|
13
|
+
recipe = MiniPortile.new(name, version)
|
|
14
|
+
recipe.files = [url]
|
|
15
|
+
recipe.configure_options += %w[--with-charset=utf8 --disable-shared --enable-static CXX='g++ -fPIC' CC='gcc -fPIC']
|
|
16
|
+
recipe.patch_files += patches.map{|f| File.expand_path(f, __dir__)} unless patches.empty?
|
|
17
|
+
recipe.cook
|
|
18
|
+
recipe.activate
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def cook_mecab
|
|
22
|
+
cook_internal('mecab', '0.996', 'https://mecab.googlecode.com/files/mecab-0.996.tar.gz')
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def cook_naist_jdic
|
|
26
|
+
cook_internal('mecab-naist-jdic', '0.6.3b-20111013',
|
|
27
|
+
'http://jaist.dl.sourceforge.jp/naist-jdic/53500/mecab-naist-jdic-0.6.3b-20111013.tar.gz',
|
|
28
|
+
%w[patch/prefix.patch])
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
cook_mecab
|
|
32
|
+
cook_naist_jdic
|
|
33
|
+
|
|
34
|
+
mecab_config = with_config('mecab-config', 'mecab-config')
|
|
35
|
+
|
|
36
|
+
get = ->(opt) { `#{mecab_config} --#{opt}`.strip }
|
|
37
|
+
|
|
38
|
+
$CFLAGS << ' ' << get['cflags'] << ' -fPIC'
|
|
39
|
+
$LDFLAGS << ' ' << get['libs']
|
|
40
|
+
$libs << ' ' << get['libs-only-l'].split.map{|l| "-l#{l}"}.join(' ') # fix broken mecab_config as pkg_config script
|
|
41
|
+
|
|
42
|
+
unless `#{mecab_config} --version`.chomp =~ /^0\.996/
|
|
43
|
+
puts "[ERROR] Mecab 0.996 requires libmecab version 0.996."
|
|
44
|
+
exit 1
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
include = Dir.glob("#{Dir.pwd}/ports/*/mecab/0.996/include").first
|
|
48
|
+
$CPPFLAGS += " -I#{include}"
|
|
49
|
+
|
|
50
|
+
have_header('mecab.h') && create_makefile('mecab')
|