unihan_lang 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: bc3bac523b20f37850e5d1e43587a736cb1922bb8fc0ea01a39b3c534d257045
4
+ data.tar.gz: f29a6426a0f23ac69d4865a0988586b628c7dd10869adafff412b0f4daabfafb
5
+ SHA512:
6
+ metadata.gz: 490b594addd8d6517bbbba34da1c3c7528c12e8a5a914101ccd5c0e71ce8fe0f406e282bf29ab0281dc8317ee7d240a58ccdfc3928911d009e5859147474081c
7
+ data.tar.gz: 13b72769f0e4e10f1e02c56659f56a39aa0f205a82312eeb14be0f431f36e5b8794c312d98306eeea063c56d5f1c711abc829539c83256d7772556eb1fb54d3b
@@ -0,0 +1,30 @@
1
+ name: Ruby
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - "**"
7
+
8
+ pull_request:
9
+
10
+ jobs:
11
+ build:
12
+ runs-on: ubuntu-latest
13
+ name: Ruby ${{ matrix.ruby }}
14
+ strategy:
15
+ matrix:
16
+ ruby:
17
+ - '3.3.0'
18
+
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ - name: Set up Ruby
22
+ uses: ruby/setup-ruby@v1
23
+ with:
24
+ ruby-version: ${{ matrix.ruby }}
25
+ bundler-cache: true
26
+ # FIXME: enable after #1
27
+ #- name: Rubocop
28
+ # run: bundle exec rubocop
29
+ - name: RSpec
30
+ run: bundle exec rspec
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.bin/
3
+ /vendor/
4
+ *.gem
5
+ .ruby-version
6
+ .ruby-gemset
7
+ /log/
8
+ /tmp/
9
+ .DS_Store
10
+ .rubocop-http*
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --require spec_helper
2
+ --format documentation
data/.rubocop.yml ADDED
@@ -0,0 +1,102 @@
1
+ # Normally additional configs of AllCops/Include and AllCops/Exclude override to RuboCops's defaults.
2
+ # But you'd like to apply both RuboCops's defaults and addtional configs of AllCops/Include and AllCops/Exclude.
3
+ # ref: https://docs.rubocop.org/en/stable/configuration/#unusual-files-that-would-not-be-included-by-default
4
+ inherit_mode:
5
+ merge:
6
+ - Include
7
+ - Exclude
8
+
9
+ AllCops:
10
+ TargetRubyVersion: 3.3
11
+ Exclude:
12
+ - 'Gemfile'
13
+ - 'bin/*'
14
+ - 'config/**/*'
15
+ - 'db/**/*'
16
+ - 'deploy/**/*'
17
+
18
+ # Accept single-line methods with no body
19
+ Style/SingleLineMethods:
20
+ AllowIfMethodIsEmpty: true
21
+
22
+ # Top-level documentation of classes and modules are needless
23
+ Style/Documentation:
24
+ Enabled: false
25
+
26
+ # Allow to chain of block after another block that spans multiple lines
27
+ Style/MultilineBlockChain:
28
+ Enabled: false
29
+
30
+ # Allow `->` literal for multi line blocks
31
+ Style/Lambda:
32
+ Enabled: false
33
+
34
+ # Both nested and compact are okay
35
+ Style/ClassAndModuleChildren:
36
+ Enabled: false
37
+
38
+ # Specifying param names is unnecessary
39
+ Style/SingleLineBlockParams:
40
+ Enabled: false
41
+
42
+ # Prefer Kernel#sprintf
43
+ Style/FormatString:
44
+ EnforcedStyle: sprintf
45
+
46
+ # Maximum method length
47
+ Metrics/MethodLength:
48
+ Max: 20
49
+
50
+ # Tune to MethodLength
51
+ Metrics/AbcSize:
52
+ Max: 30
53
+
54
+ # Tune to MethodLength
55
+ Metrics/ClassLength:
56
+ Max: 200
57
+
58
+ # Maximum line length
59
+ Layout/LineLength:
60
+ Max: 100
61
+
62
+ # Allow `has_` as prefix of predicate methods
63
+ Naming/PredicateName:
64
+ ForbiddenPrefixes:
65
+ - is_
66
+ - have_
67
+
68
+ # Prefer double_quotes strings unless your string literal contains escape chars
69
+ Style/StringLiterals:
70
+ EnforcedStyle: double_quotes
71
+
72
+ # Prefer raise over fail for exceptions
73
+ Style/SignalException:
74
+ EnforcedStyle: only_raise
75
+
76
+ # Allow empty condition in case statements
77
+ Style/EmptyCaseCondition:
78
+ Enabled: false
79
+
80
+ # Prefer trailing comma in argument lists
81
+ Style/TrailingCommaInArguments:
82
+ EnforcedStyleForMultiline: comma
83
+
84
+ # Prefer trailing comma in array literals
85
+ Style/TrailingCommaInArrayLiteral:
86
+ EnforcedStyleForMultiline: comma
87
+
88
+ # Prefer trailing comma in hash literals
89
+ Style/TrailingCommaInHashLiteral:
90
+ EnforcedStyleForMultiline: comma
91
+
92
+ # Prefer parentheses for almost all percent literals
93
+ Style/PercentLiteralDelimiters:
94
+ PreferredDelimiters:
95
+ '%i': '()'
96
+ '%I': '()'
97
+ '%w': '()'
98
+ '%W': '()'
99
+
100
+ # Prefer `has_?` style for Hash methods
101
+ Style/PreferredHashMethods:
102
+ EnforcedStyle: verbose
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
4
+
5
+ gem 'rspec', '~> 3.0'
6
+ gem 'rubocop', require: false
7
+ gem 'rubocop-performance', require: false
8
+ gem 'rubocop-rspec', require: false
data/Gemfile.lock ADDED
@@ -0,0 +1,68 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ unihan_lang (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ ast (2.4.2)
10
+ diff-lcs (1.5.1)
11
+ json (2.7.2)
12
+ language_server-protocol (3.17.0.3)
13
+ parallel (1.26.3)
14
+ parser (3.3.4.2)
15
+ ast (~> 2.4.1)
16
+ racc
17
+ racc (1.8.1)
18
+ rainbow (3.1.1)
19
+ rake (13.2.1)
20
+ regexp_parser (2.9.2)
21
+ rspec (3.13.0)
22
+ rspec-core (~> 3.13.0)
23
+ rspec-expectations (~> 3.13.0)
24
+ rspec-mocks (~> 3.13.0)
25
+ rspec-core (3.13.1)
26
+ rspec-support (~> 3.13.0)
27
+ rspec-expectations (3.13.2)
28
+ diff-lcs (>= 1.2.0, < 2.0)
29
+ rspec-support (~> 3.13.0)
30
+ rspec-mocks (3.13.1)
31
+ diff-lcs (>= 1.2.0, < 2.0)
32
+ rspec-support (~> 3.13.0)
33
+ rspec-support (3.13.1)
34
+ rubocop (1.66.0)
35
+ json (~> 2.3)
36
+ language_server-protocol (>= 3.17.0)
37
+ parallel (~> 1.10)
38
+ parser (>= 3.3.0.2)
39
+ rainbow (>= 2.2.2, < 4.0)
40
+ regexp_parser (>= 2.4, < 3.0)
41
+ rubocop-ast (>= 1.32.1, < 2.0)
42
+ ruby-progressbar (~> 1.7)
43
+ unicode-display_width (>= 2.4.0, < 3.0)
44
+ rubocop-ast (1.32.2)
45
+ parser (>= 3.3.1.0)
46
+ rubocop-performance (1.21.1)
47
+ rubocop (>= 1.48.1, < 2.0)
48
+ rubocop-ast (>= 1.31.1, < 2.0)
49
+ rubocop-rspec (3.0.4)
50
+ rubocop (~> 1.61)
51
+ ruby-progressbar (1.13.0)
52
+ unicode-display_width (2.5.0)
53
+
54
+ PLATFORMS
55
+ arm64-darwin-23
56
+ ruby
57
+
58
+ DEPENDENCIES
59
+ bundler (~> 2.0)
60
+ rake (~> 13.0)
61
+ rspec (~> 3.0)
62
+ rubocop
63
+ rubocop-performance
64
+ rubocop-rspec
65
+ unihan_lang!
66
+
67
+ BUNDLED WITH
68
+ 2.5.4
data/README.md ADDED
@@ -0,0 +1,57 @@
1
+ <!-- @format -->
2
+
3
+ # UnihanLang
4
+
5
+ UnihanLang は、テキストの言語(日本語、繁体字中国語、簡体字中国語)を識別するための Ruby ライブラリです。
6
+
7
+ ## インストール
8
+
9
+ Gemfile に以下の行を追加してください:
10
+
11
+ ```ruby
12
+ gem 'unihan_lang'
13
+ ```
14
+
15
+ そして、以下のコマンドを実行してください:
16
+
17
+ ```sh
18
+ bundle install
19
+ ```
20
+
21
+ または、直接インストールする場合は以下のコマンドを使用してください:
22
+
23
+ ```sh
24
+ gem install unihan_lang
25
+ ```
26
+
27
+ ## 使用方法
28
+
29
+ ```ruby
30
+ require 'unihan_lang'
31
+
32
+ unihan = UnihanLang::Unihan.new
33
+
34
+ # 言語の判定
35
+ puts unihan.determine_language("這是繁體中文") # => "ZH_TW"
36
+ puts unihan.determine_language("这是简体中文") # => "ZH_CN"
37
+
38
+ # 繁体字中国語かどうかの判定
39
+ puts unihan.zh_tw?("這是繁體中文") # => true
40
+ puts unihan.zh_tw?("这不是繁体中文") # => false
41
+
42
+ # 簡体字中国語かどうかの判定
43
+ puts unihan.zh_cn?("这是简体中文") # => true
44
+ puts unihan.zh_cn?("這不是簡體中文") # => false
45
+
46
+ # テキストに中国語の文字が含まれているかの判定
47
+ puts unihan.contains_chinese?("This text contains 中文") # => true
48
+ puts unihan.contains_chinese?("This text has no Chinese") # => false
49
+
50
+ # テキストから中国語の文字を抽出
51
+ puts unihan.extract_chinese_characters("This text contains 中文").join # => "中文"
52
+ ```
53
+
54
+ ## 注意事項
55
+
56
+ このライブラリは、テキストの言語を完全に正確に判定することを保証するものではありません。
57
+ 特に、短いテキストや複数の言語が混在するテキストの場合、判定が難しい場合があります。
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i(spec rubocop)