suika 0.3.1 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +1 -1
- data/README.md +2 -6
- data/lib/suika/tagger.rb +2 -2
- data/lib/suika/version.rb +1 -1
- metadata +32 -19
- data/.coveralls.yml +0 -1
- data/.github/workflows/build.yml +0 -21
- data/.github/workflows/coverage.yml +0 -26
- data/.gitignore +0 -16
- data/.rspec +0 -3
- data/.rubocop.yml +0 -119
- data/CODE_OF_CONDUCT.md +0 -74
- data/Gemfile +0 -13
- data/Rakefile +0 -79
- data/Steepfile +0 -20
- data/bin/console +0 -14
- data/bin/setup +0 -8
- data/suika.gemspec +0 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7d6745438c0a8281903fe9cd22adb6db0b4226b3b8b0677e90ecba65fc287b8c
|
4
|
+
data.tar.gz: 4c512813f3c9b14083fa53c91dc8c95e570344c836c1303147f25c3e3eab35f9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b0f9ce528cb4751399e5976fb744c4eee8300044ca8dbe10f26b90e25ef9e63764ee7b2319ae0ab7dff63ad03449d88f0ccd7df89765db19cbcce7479480a7d
|
7
|
+
data.tar.gz: 828cdce758ac69c326a2d88dd8a72744434e2ac7eca70aa773b37f06a9f92fd3199963cb2b153ec8bfe4edecbfb0478676036a236514c69c6b1f799848a6fdbd
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.3.3
|
2
|
+
|
3
|
+
- Add csv and nkf gems to runtime dependencies for Ruby 3.4.
|
4
|
+
- Refactor codes and configs with RuboCop.
|
5
|
+
|
6
|
+
## 0.3.2
|
7
|
+
|
8
|
+
- Refactor codes and configs with RuboCop.
|
9
|
+
|
1
10
|
## 0.3.1
|
2
11
|
- Fix Tagger's inspect method not to expand instance variables for object creation on irb and pry.
|
3
12
|
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Suika
|
2
2
|
|
3
3
|
[![Build Status](https://github.com/yoshoku/suika/workflows/build/badge.svg)](https://github.com/yoshoku/suika/actions?query=workflow%3Abuild)
|
4
|
-
[![Coverage Status](https://coveralls.io/repos/github/yoshoku/suika/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/suika?branch=main)
|
5
4
|
[![Gem Version](https://badge.fury.io/rb/suika.svg)](https://badge.fury.io/rb/suika)
|
6
5
|
[![BSD 3-Clause License](https://img.shields.io/badge/License-BSD%203--Clause-orange.svg)](https://github.com/yoshoku/suika/blob/main/LICENSE.txt)
|
7
6
|
[![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://rubydoc.info/gems/suika)
|
@@ -75,7 +74,8 @@ end
|
|
75
74
|
## Contributing
|
76
75
|
|
77
76
|
Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/suika.
|
78
|
-
This project is intended to be a safe, welcoming space for collaboration,
|
77
|
+
This project is intended to be a safe, welcoming space for collaboration,
|
78
|
+
and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
79
79
|
|
80
80
|
## License
|
81
81
|
|
@@ -92,7 +92,3 @@ Suika is created with reference to [the book on morphological analysis](https://
|
|
92
92
|
- [Tomoko Uchida](https://github.com/mocobeta) is the author of [Janome](https://github.com/mocobeta/janome) that is a Japanese morphological analysis engine written in pure Python.
|
93
93
|
Suika is heavily influenced by Janome's idea to include the built-in dictionary and language model.
|
94
94
|
Janome, a morphological analyzer written in scripting language, gives me the courage to develop Suika.
|
95
|
-
|
96
|
-
## Code of Conduct
|
97
|
-
|
98
|
-
Everyone interacting in the Suika project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/suika/blob/main/CODE_OF_CONDUCT.md).
|
data/lib/suika/tagger.rb
CHANGED
@@ -24,7 +24,7 @@ module Suika
|
|
24
24
|
class Tagger
|
25
25
|
# Create a new tagger by loading the built-in binary dictionary.
|
26
26
|
def initialize
|
27
|
-
raise IOError, 'SHA1 digest of dictionary file does not match.' unless
|
27
|
+
raise IOError, 'SHA1 digest of dictionary file does not match.' unless Digest::SHA1.file(DICTIONARY_PATH).to_s == DICTIONARY_KEY
|
28
28
|
|
29
29
|
@sysdic = Marshal.load(Zlib::GzipReader.open(DICTIONARY_PATH, &:read))
|
30
30
|
@trie = DartsClone::DoubleArray.new
|
@@ -34,7 +34,7 @@ module Suika
|
|
34
34
|
# Parse the given sentence.
|
35
35
|
# @param sentence [String] Japanese text to be parsed.
|
36
36
|
# @return [Array<String>]
|
37
|
-
def parse(sentence)
|
37
|
+
def parse(sentence) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
|
38
38
|
lattice = Lattice.new(sentence.length)
|
39
39
|
start = 0
|
40
40
|
terminal = sentence.length
|
data/lib/suika/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,28 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: suika
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
|
-
autorequire:
|
9
8
|
bindir: exe
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2024-12-29 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: csv
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: 3.1.9
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 3.1.9
|
13
26
|
- !ruby/object:Gem::Dependency
|
14
27
|
name: dartsclone
|
15
28
|
requirement: !ruby/object:Gem::Requirement
|
@@ -24,6 +37,20 @@ dependencies:
|
|
24
37
|
- - ">="
|
25
38
|
- !ruby/object:Gem::Version
|
26
39
|
version: 0.2.0
|
40
|
+
- !ruby/object:Gem::Dependency
|
41
|
+
name: nkf
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 0.1.1
|
47
|
+
type: :runtime
|
48
|
+
prerelease: false
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.1.1
|
27
54
|
description: Suika is a Japanese morphological analyzer written in pure Ruby.
|
28
55
|
email:
|
29
56
|
- yoshoku@outlook.com
|
@@ -31,22 +58,10 @@ executables: []
|
|
31
58
|
extensions: []
|
32
59
|
extra_rdoc_files: []
|
33
60
|
files:
|
34
|
-
- ".coveralls.yml"
|
35
|
-
- ".github/workflows/build.yml"
|
36
|
-
- ".github/workflows/coverage.yml"
|
37
|
-
- ".gitignore"
|
38
|
-
- ".rspec"
|
39
|
-
- ".rubocop.yml"
|
40
61
|
- CHANGELOG.md
|
41
|
-
- CODE_OF_CONDUCT.md
|
42
|
-
- Gemfile
|
43
62
|
- LICENSE.txt
|
44
63
|
- NOTICE.txt
|
45
64
|
- README.md
|
46
|
-
- Rakefile
|
47
|
-
- Steepfile
|
48
|
-
- bin/console
|
49
|
-
- bin/setup
|
50
65
|
- dict/sysdic.gz
|
51
66
|
- lib/suika.rb
|
52
67
|
- lib/suika/char_def.rb
|
@@ -59,7 +74,6 @@ files:
|
|
59
74
|
- sig/suika/lattice.rbs
|
60
75
|
- sig/suika/node.rbs
|
61
76
|
- sig/suika/tagger.rbs
|
62
|
-
- suika.gemspec
|
63
77
|
homepage: https://github.com/yoshoku/suika
|
64
78
|
licenses:
|
65
79
|
- BSD-3-Clause
|
@@ -68,7 +82,7 @@ metadata:
|
|
68
82
|
source_code_uri: https://github.com/yoshoku/suika
|
69
83
|
changelog_uri: https://github.com/yoshoku/suika/blob/main/CHANGELOG.md
|
70
84
|
documentation_uri: https://rubydoc.info/gems/suika
|
71
|
-
|
85
|
+
rubygems_mfa_required: 'true'
|
72
86
|
rdoc_options: []
|
73
87
|
require_paths:
|
74
88
|
- lib
|
@@ -83,8 +97,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
83
97
|
- !ruby/object:Gem::Version
|
84
98
|
version: '0'
|
85
99
|
requirements: []
|
86
|
-
rubygems_version: 3.2
|
87
|
-
signing_key:
|
100
|
+
rubygems_version: 3.6.2
|
88
101
|
specification_version: 4
|
89
102
|
summary: Suika is a Japanese morphological analyzer written in pure Ruby.
|
90
103
|
test_files: []
|
data/.coveralls.yml
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
service_name: github-ci
|
data/.github/workflows/build.yml
DELETED
@@ -1,21 +0,0 @@
|
|
1
|
-
name: build
|
2
|
-
|
3
|
-
on: [push, pull_request]
|
4
|
-
|
5
|
-
jobs:
|
6
|
-
build:
|
7
|
-
runs-on: ubuntu-latest
|
8
|
-
strategy:
|
9
|
-
matrix:
|
10
|
-
ruby: [ '2.6', '2.7', '3.0' ]
|
11
|
-
steps:
|
12
|
-
- uses: actions/checkout@v2
|
13
|
-
- name: Set up Ruby ${{ matrix.ruby }}
|
14
|
-
uses: actions/setup-ruby@v1
|
15
|
-
with:
|
16
|
-
ruby-version: ${{ matrix.ruby }}
|
17
|
-
- name: Build and test with Rake
|
18
|
-
run: |
|
19
|
-
gem install --no-document bundler
|
20
|
-
bundle install --jobs 4 --retry 3
|
21
|
-
bundle exec rake
|
@@ -1,26 +0,0 @@
|
|
1
|
-
name: coverage
|
2
|
-
|
3
|
-
on:
|
4
|
-
push:
|
5
|
-
branches: [ main ]
|
6
|
-
pull_request:
|
7
|
-
branches: [ main ]
|
8
|
-
|
9
|
-
jobs:
|
10
|
-
coverage:
|
11
|
-
runs-on: ubuntu-latest
|
12
|
-
steps:
|
13
|
-
- uses: actions/checkout@v2
|
14
|
-
- name: Set up Ruby 2.7
|
15
|
-
uses: actions/setup-ruby@v1
|
16
|
-
with:
|
17
|
-
ruby-version: '2.7'
|
18
|
-
- name: Build and test with Rake
|
19
|
-
run: |
|
20
|
-
gem install --no-document bundler
|
21
|
-
bundle install --jobs 4 --retry 3
|
22
|
-
bundle exec rake
|
23
|
-
- name: Coveralls GitHub Action
|
24
|
-
uses: coverallsapp/github-action@v1.1.2
|
25
|
-
with:
|
26
|
-
github-token: ${{ secrets.GITHUB_TOKEN }}
|
data/.gitignore
DELETED
data/.rspec
DELETED
data/.rubocop.yml
DELETED
@@ -1,119 +0,0 @@
|
|
1
|
-
require:
|
2
|
-
- rubocop-performance
|
3
|
-
- rubocop-rspec
|
4
|
-
|
5
|
-
AllCops:
|
6
|
-
NewCops: enable
|
7
|
-
TargetRubyVersion: 2.5
|
8
|
-
DisplayCopNames: true
|
9
|
-
DisplayStyleGuide: true
|
10
|
-
Exclude:
|
11
|
-
- 'bin/*'
|
12
|
-
- 'suika.gemspec'
|
13
|
-
- 'Rakefile'
|
14
|
-
- 'Gemfile'
|
15
|
-
|
16
|
-
Layout/EmptyLineAfterGuardClause:
|
17
|
-
Enabled: false
|
18
|
-
|
19
|
-
Layout/EmptyLinesAroundAttributeAccessor:
|
20
|
-
Enabled: true
|
21
|
-
|
22
|
-
Layout/LineLength:
|
23
|
-
Max: 145
|
24
|
-
IgnoredPatterns: ['(\A|\s)#']
|
25
|
-
|
26
|
-
Layout/SpaceAroundMethodCallOperator:
|
27
|
-
Enabled: true
|
28
|
-
|
29
|
-
Lint/DeprecatedOpenSSLConstant:
|
30
|
-
Enabled: true
|
31
|
-
|
32
|
-
Lint/MixedRegexpCaptureTypes:
|
33
|
-
Enabled: true
|
34
|
-
|
35
|
-
Lint/RaiseException:
|
36
|
-
Enabled: true
|
37
|
-
|
38
|
-
Lint/StructNewOverride:
|
39
|
-
Enabled: true
|
40
|
-
|
41
|
-
Metrics/ModuleLength:
|
42
|
-
Max: 200
|
43
|
-
|
44
|
-
Metrics/ClassLength:
|
45
|
-
Max: 200
|
46
|
-
|
47
|
-
Metrics/MethodLength:
|
48
|
-
Max: 50
|
49
|
-
|
50
|
-
Metrics/AbcSize:
|
51
|
-
Max: 60
|
52
|
-
|
53
|
-
Metrics/CyclomaticComplexity:
|
54
|
-
Max: 16
|
55
|
-
|
56
|
-
Metrics/PerceivedComplexity:
|
57
|
-
Max: 16
|
58
|
-
|
59
|
-
Metrics/BlockLength:
|
60
|
-
Max: 40
|
61
|
-
Exclude:
|
62
|
-
- 'spec/**/*'
|
63
|
-
|
64
|
-
Metrics/ParameterLists:
|
65
|
-
Max: 12
|
66
|
-
|
67
|
-
Naming/MethodParameterName:
|
68
|
-
Enabled: false
|
69
|
-
|
70
|
-
Naming/ConstantName:
|
71
|
-
Enabled: false
|
72
|
-
|
73
|
-
Security/MarshalLoad:
|
74
|
-
Enabled: false
|
75
|
-
|
76
|
-
Style/AsciiComments:
|
77
|
-
Enabled: false
|
78
|
-
|
79
|
-
Style/Documentation:
|
80
|
-
Enabled: false
|
81
|
-
|
82
|
-
Style/ExponentialNotation:
|
83
|
-
Enabled: true
|
84
|
-
|
85
|
-
Style/HashEachMethods:
|
86
|
-
Enabled: true
|
87
|
-
|
88
|
-
Style/HashTransformKeys:
|
89
|
-
Enabled: true
|
90
|
-
|
91
|
-
Style/HashTransformValues:
|
92
|
-
Enabled: true
|
93
|
-
|
94
|
-
Style/RedundantRegexpCharacterClass:
|
95
|
-
Enabled: true
|
96
|
-
|
97
|
-
Style/RedundantRegexpEscape:
|
98
|
-
Enabled: true
|
99
|
-
|
100
|
-
Style/SlicingWithRange:
|
101
|
-
Enabled: true
|
102
|
-
|
103
|
-
Style/FormatStringToken:
|
104
|
-
Enabled: false
|
105
|
-
|
106
|
-
Style/NumericLiterals:
|
107
|
-
Enabled: false
|
108
|
-
|
109
|
-
RSpec/MultipleExpectations:
|
110
|
-
Enabled: false
|
111
|
-
|
112
|
-
RSpec/ExampleLength:
|
113
|
-
Max: 40
|
114
|
-
|
115
|
-
RSpec/InstanceVariable:
|
116
|
-
Enabled: false
|
117
|
-
|
118
|
-
RSpec/LeakyConstantDeclaration:
|
119
|
-
Enabled: false
|
data/CODE_OF_CONDUCT.md
DELETED
@@ -1,74 +0,0 @@
|
|
1
|
-
# Contributor Covenant Code of Conduct
|
2
|
-
|
3
|
-
## Our Pledge
|
4
|
-
|
5
|
-
In the interest of fostering an open and welcoming environment, we as
|
6
|
-
contributors and maintainers pledge to making participation in our project and
|
7
|
-
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
-
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
-
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
-
orientation.
|
11
|
-
|
12
|
-
## Our Standards
|
13
|
-
|
14
|
-
Examples of behavior that contributes to creating a positive environment
|
15
|
-
include:
|
16
|
-
|
17
|
-
* Using welcoming and inclusive language
|
18
|
-
* Being respectful of differing viewpoints and experiences
|
19
|
-
* Gracefully accepting constructive criticism
|
20
|
-
* Focusing on what is best for the community
|
21
|
-
* Showing empathy towards other community members
|
22
|
-
|
23
|
-
Examples of unacceptable behavior by participants include:
|
24
|
-
|
25
|
-
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
-
advances
|
27
|
-
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
-
* Public or private harassment
|
29
|
-
* Publishing others' private information, such as a physical or electronic
|
30
|
-
address, without explicit permission
|
31
|
-
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
-
professional setting
|
33
|
-
|
34
|
-
## Our Responsibilities
|
35
|
-
|
36
|
-
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
-
behavior and are expected to take appropriate and fair corrective action in
|
38
|
-
response to any instances of unacceptable behavior.
|
39
|
-
|
40
|
-
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
-
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
-
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
-
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
-
threatening, offensive, or harmful.
|
45
|
-
|
46
|
-
## Scope
|
47
|
-
|
48
|
-
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
-
when an individual is representing the project or its community. Examples of
|
50
|
-
representing a project or community include using an official project e-mail
|
51
|
-
address, posting via an official social media account, or acting as an appointed
|
52
|
-
representative at an online or offline event. Representation of a project may be
|
53
|
-
further defined and clarified by project maintainers.
|
54
|
-
|
55
|
-
## Enforcement
|
56
|
-
|
57
|
-
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
-
reported by contacting the project team at yoshoku@outlook.com. All
|
59
|
-
complaints will be reviewed and investigated and will result in a response that
|
60
|
-
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
-
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
-
Further details of specific enforcement policies may be posted separately.
|
63
|
-
|
64
|
-
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
-
faith may face temporary or permanent repercussions as determined by other
|
66
|
-
members of the project's leadership.
|
67
|
-
|
68
|
-
## Attribution
|
69
|
-
|
70
|
-
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
-
available at [https://contributor-covenant.org/version/1/4][version]
|
72
|
-
|
73
|
-
[homepage]: https://contributor-covenant.org
|
74
|
-
[version]: https://contributor-covenant.org/version/1/4/
|
data/Gemfile
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
source 'https://rubygems.org'
|
4
|
-
|
5
|
-
# Specify your gem's dependencies in suika.gemspec
|
6
|
-
gemspec
|
7
|
-
|
8
|
-
gem 'rake', '~> 13.0'
|
9
|
-
gem 'rspec', '~> 3.0'
|
10
|
-
gem 'simplecov', '~> 0.21'
|
11
|
-
gem 'simplecov-lcov', '~> 0.8'
|
12
|
-
gem 'rbs', '~> 1.2'
|
13
|
-
gem 'steep', '~> 0.44'
|
data/Rakefile
DELETED
@@ -1,79 +0,0 @@
|
|
1
|
-
require 'bundler/gem_tasks'
|
2
|
-
require 'rspec/core/rake_task'
|
3
|
-
|
4
|
-
require 'csv'
|
5
|
-
require 'dartsclone'
|
6
|
-
require 'nkf'
|
7
|
-
require 'rubygems/package'
|
8
|
-
require 'zlib'
|
9
|
-
|
10
|
-
RSpec::Core::RakeTask.new(:spec)
|
11
|
-
|
12
|
-
task :default => :spec
|
13
|
-
|
14
|
-
desc 'Build suika system dictionary'
|
15
|
-
task :dictionary do
|
16
|
-
base_dir = "#{__dir__}/dict/mecab-ipadic-2.7.0-20070801"
|
17
|
-
unless File.directory?(base_dir)
|
18
|
-
puts "Download mecab-ipadic file and expand that under dict directory: #{__dir__}/dict/mecab-ipadic-2.7.0-20070801"
|
19
|
-
puts
|
20
|
-
puts 'Example:'
|
21
|
-
puts 'wget -O dict/mecab-ipadic.tgz https://drive.google.com/uc?export=download&id=0B4y35FiV1wh7MWVlSDBCSXZMTXM'
|
22
|
-
puts 'cd dict'
|
23
|
-
puts 'tar xzf mecab-ipadic.tgz'
|
24
|
-
puts 'cd ../'
|
25
|
-
next # exit
|
26
|
-
end
|
27
|
-
|
28
|
-
File.open("#{__dir__}/dict/mecab-ipadic-2.7.0-20070801/Reiwa.csv", 'w') do |f|
|
29
|
-
f.puts('令和,1288,1288,5904,名詞,固有名詞,一般,*,*,*,令和,レイワ,レイワ')
|
30
|
-
end
|
31
|
-
|
32
|
-
unknowns = {}
|
33
|
-
File.open("#{base_dir}/unk.def") do |f|
|
34
|
-
f.each_line do |line|
|
35
|
-
row = NKF.nkf('-w', line.chomp).split(',')
|
36
|
-
unknowns[row[0]] ||= []
|
37
|
-
unknowns[row[0]] << [row[1].to_i, row[2].to_i, row[3].to_i, *row[4..-1]]
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
41
|
-
dict = {}
|
42
|
-
Dir.glob("#{base_dir}/*.csv").each do |filename|
|
43
|
-
File.open(filename) do |f|
|
44
|
-
f.each_line do |line|
|
45
|
-
row = NKF.nkf('-w', line.chomp).split(',')
|
46
|
-
dict[row[0]] ||= []
|
47
|
-
dict[row[0]] << [row[1].to_i, row[2].to_i, row[3].to_i, *row[4..-1]]
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
da = DartsClone::DoubleArray.new
|
53
|
-
words = dict.keys.sort
|
54
|
-
da.build(words)
|
55
|
-
features = words.map { |w| dict[w] }
|
56
|
-
|
57
|
-
concosts = nil
|
58
|
-
File.open("#{base_dir}/matrix.def") do |f|
|
59
|
-
n_entries = f.readline.chomp.split.map(&:to_i).first
|
60
|
-
concosts = Array.new(n_entries) { Array.new(n_entries) }
|
61
|
-
f.each_line do |line|
|
62
|
-
row, col, cost = line.chomp.split.map(&:to_i)
|
63
|
-
concosts[row][col] = cost
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
ipadic = {
|
68
|
-
trie: da.get_array,
|
69
|
-
features: features,
|
70
|
-
unknowns: unknowns,
|
71
|
-
concosts: concosts
|
72
|
-
}
|
73
|
-
|
74
|
-
Zlib::GzipWriter.open("#{__dir__}/dict/sysdic.gz", Zlib::BEST_SPEED) { |f| f.write(Marshal.dump(ipadic)) }
|
75
|
-
|
76
|
-
puts 'The system dictionary has been successfully built:'
|
77
|
-
puts "#{__dir__}/dict/sysdic.gz"
|
78
|
-
puts Digest::SHA1.file("#{__dir__}/dict/sysdic.gz").to_s
|
79
|
-
end
|
data/Steepfile
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
target :lib do
|
2
|
-
signature "sig"
|
3
|
-
#
|
4
|
-
check "lib" # Directory name
|
5
|
-
# check "Gemfile" # File name
|
6
|
-
# check "app/models/**/*.rb" # Glob
|
7
|
-
# # ignore "lib/templates/*.rb"
|
8
|
-
#
|
9
|
-
# # library "pathname", "set" # Standard libraries
|
10
|
-
library "dartsclone" # Gems
|
11
|
-
end
|
12
|
-
|
13
|
-
# target :spec do
|
14
|
-
# signature "sig", "sig-private"
|
15
|
-
#
|
16
|
-
# check "spec"
|
17
|
-
#
|
18
|
-
# # library "pathname", "set" # Standard libraries
|
19
|
-
# # library "rspec"
|
20
|
-
# end
|
data/bin/console
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require "bundler/setup"
|
4
|
-
require "suika"
|
5
|
-
|
6
|
-
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
-
# with your gem easier. You can also use a different console, if you like.
|
8
|
-
|
9
|
-
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
# require "pry"
|
11
|
-
# Pry.start
|
12
|
-
|
13
|
-
require "irb"
|
14
|
-
IRB.start(__FILE__)
|
data/bin/setup
DELETED
data/suika.gemspec
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'lib/suika/version'
|
4
|
-
|
5
|
-
Gem::Specification.new do |spec|
|
6
|
-
spec.name = 'suika'
|
7
|
-
spec.version = Suika::VERSION
|
8
|
-
spec.authors = ['yoshoku']
|
9
|
-
spec.email = ['yoshoku@outlook.com']
|
10
|
-
|
11
|
-
spec.summary = 'Suika is a Japanese morphological analyzer written in pure Ruby.'
|
12
|
-
spec.description = 'Suika is a Japanese morphological analyzer written in pure Ruby.'
|
13
|
-
spec.homepage = 'https://github.com/yoshoku/suika'
|
14
|
-
spec.license = 'BSD-3-Clause'
|
15
|
-
|
16
|
-
spec.metadata['homepage_uri'] = spec.homepage
|
17
|
-
spec.metadata['source_code_uri'] = spec.homepage
|
18
|
-
spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/suika/blob/main/CHANGELOG.md'
|
19
|
-
spec.metadata['documentation_uri'] = 'https://rubydoc.info/gems/suika'
|
20
|
-
|
21
|
-
# Specify which files should be added to the gem when it is released.
|
22
|
-
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
23
|
-
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
24
|
-
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
-
end
|
26
|
-
spec.bindir = 'exe'
|
27
|
-
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
|
-
spec.require_paths = ['lib']
|
29
|
-
|
30
|
-
spec.add_runtime_dependency 'dartsclone', '>= 0.2.0'
|
31
|
-
end
|