wordcuta 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (8) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +503 -158
  3. data/README.md +11 -1
  4. data/lib/wordcuta.rb +12 -5
  5. data/wordcuta.gemspec +9 -9
  6. metadata +12 -16
  7. data/Gemfile +0 -5
  8. data/data/thai-dix.txt +0 -15890
data/README.md CHANGED
@@ -5,11 +5,21 @@
5
5
  * Install [wordcutw](https://github.com/veer66/wordcutw)
6
6
  * gem install wordcuta
7
7
 
8
+ ## Prerequisite
9
+
10
+ ### Dictionaries and rules
11
+
12
+ ```Shell
13
+ wget https://codeberg.org/mekong-lang/mekong-lang-data/archive/main.tar.gz -O - | tar -xzvf -
14
+ ```
15
+
8
16
  ## Example
9
17
 
10
18
  ```Ruby
11
19
  require 'wordcuta'
12
- wc = WordcutA::Wordcut.new(WordcutA::DEFAULT_THAI_DICT_PATH)
20
+
21
+ wc = WordcutA::Wordcut.new("mekong-lang-data/dictionaries/mixed/chamkho-dict.txt",
22
+ "mekong-lang-data/cluster-rules/thai_cluster_rules.txt")
13
23
  p wc.put_delimiters('กากาก้า', '|')
14
24
  p wc.into_strings('กากา')
15
25
  p wc.into_ranges('กากา')
data/lib/wordcuta.rb CHANGED
@@ -10,8 +10,9 @@ module WordcutFFI
10
10
  extend FFI::Library
11
11
 
12
12
  ffi_lib "wordcutw"
13
-
13
+
14
14
  attach_function :wordcut_new_with_dict, [:string], :pointer
15
+ attach_function :wordcut_new_with_dict_and_cluster_rules, [:string, :string], :pointer
15
16
  attach_function :wordcut_into_text_ranges, [:pointer, :string, :pointer], :pointer
16
17
  attach_function :wordcut_into_strings, [:pointer, :string, :pointer], :pointer
17
18
  attach_function :wordcut_put_delimiters, [:pointer, :string, :string], :string
@@ -23,11 +24,17 @@ module WordcutA
23
24
  TextRange = Struct.new(:s, :e)
24
25
 
25
26
  DEFAULT_THAI_DICT_PATH = File.expand_path('../../data/thai-dix.txt', __FILE__)
26
-
27
+
27
28
  class Wordcut
28
- def initialize(dict_path)
29
- @wordcut_p = FFI::AutoPointer.new(WordcutFFI.wordcut_new_with_dict(dict_path),
30
- WordcutFFI.method(:delete_wordcut))
29
+ def initialize(dict_path, cluster_rule_path = nil)
30
+ if cluster_rule_path
31
+ @wordcut_p = FFI::AutoPointer.new(
32
+ WordcutFFI.wordcut_new_with_dict_and_cluster_rules(dict_path, cluster_rule_path),
33
+ WordcutFFI.method(:delete_wordcut))
34
+ else
35
+ @wordcut_p = FFI::AutoPointer.new(WordcutFFI.wordcut_new_with_dict(dict_path),
36
+ WordcutFFI.method(:delete_wordcut))
37
+ end
31
38
  end
32
39
 
33
40
  def into_ranges(text)
data/wordcuta.gemspec CHANGED
@@ -1,13 +1,13 @@
1
1
  Gem::Specification.new do |s|
2
- s.name = 'wordcuta'
3
- s.version = '0.2.1'
4
- s.authors = ['Vee Satayamas']
5
- s.email = ['5ssgdxltv@relay.firefox.com']
6
- s.licenses = ['LGPL-3.0']
7
- s.description = "A word segmentation tools for ASEAN languages wrapper for Ruby"
8
- s.homepage = "https://github.com/veer66/wordcuta"
2
+ s.name = "wordcuta"
3
+ s.version = "0.4.0"
4
+ s.authors = ["Vee Satayamas"]
5
+ s.email = ["vsatayamas@gmail.com"]
6
+ s.licenses = ["BSD-2-Clause"]
7
+ s.description = "A Ruby wrapper of wordcut-engine - a word segmentation tools for ASEAN languages, i.e. Khmer, Lao, Myanmar, and Thai"
8
+ s.homepage = "https://codeberg.org/mekong/wordcuta"
9
9
  s.required_ruby_version = Gem::Requirement.new(">= 2.3.0")
10
10
  s.summary = "A word segmentation tools for ASEAN languages wrapper for Ruby"
11
- s.files = %w(README.md LICENSE Gemfile wordcuta.gemspec data/thai-dix.txt lib/wordcuta.rb)
12
- s.add_dependency 'ffi', '1.15.5'
11
+ s.files = %w(README.md LICENSE wordcuta.gemspec lib/wordcuta.rb)
12
+ s.add_dependency "ffi", "~> 1.15"
13
13
  end
metadata CHANGED
@@ -1,47 +1,44 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wordcuta
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Vee Satayamas
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2022-02-16 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: ffi
15
14
  requirement: !ruby/object:Gem::Requirement
16
15
  requirements:
17
- - - '='
16
+ - - "~>"
18
17
  - !ruby/object:Gem::Version
19
- version: 1.15.5
18
+ version: '1.15'
20
19
  type: :runtime
21
20
  prerelease: false
22
21
  version_requirements: !ruby/object:Gem::Requirement
23
22
  requirements:
24
- - - '='
23
+ - - "~>"
25
24
  - !ruby/object:Gem::Version
26
- version: 1.15.5
27
- description: A word segmentation tools for ASEAN languages wrapper for Ruby
25
+ version: '1.15'
26
+ description: A Ruby wrapper of wordcut-engine - a word segmentation tools for ASEAN
27
+ languages, i.e. Khmer, Lao, Myanmar, and Thai
28
28
  email:
29
- - 5ssgdxltv@relay.firefox.com
29
+ - vsatayamas@gmail.com
30
30
  executables: []
31
31
  extensions: []
32
32
  extra_rdoc_files: []
33
33
  files:
34
- - Gemfile
35
34
  - LICENSE
36
35
  - README.md
37
- - data/thai-dix.txt
38
36
  - lib/wordcuta.rb
39
37
  - wordcuta.gemspec
40
- homepage: https://github.com/veer66/wordcuta
38
+ homepage: https://codeberg.org/mekong/wordcuta
41
39
  licenses:
42
- - LGPL-3.0
40
+ - BSD-2-Clause
43
41
  metadata: {}
44
- post_install_message:
45
42
  rdoc_options: []
46
43
  require_paths:
47
44
  - lib
@@ -56,8 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
56
53
  - !ruby/object:Gem::Version
57
54
  version: '0'
58
55
  requirements: []
59
- rubygems_version: 3.2.22
60
- signing_key:
56
+ rubygems_version: 3.7.1
61
57
  specification_version: 4
62
58
  summary: A word segmentation tools for ASEAN languages wrapper for Ruby
63
59
  test_files: []
data/Gemfile DELETED
@@ -1,5 +0,0 @@
1
- source :rubygems
2
-
3
- gemspec
4
-
5
- gem 'ffi'