cld3 3.4.3 → 3.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2c161cbf12d260074efd2e9db3981b6615af20ee04c234d6b2710bd52a283a4e
4
- data.tar.gz: c388ae6b529d95e015ecdb7d21cdd7f1ceaca72d167d0f8008b5477d5bce5b3c
3
+ metadata.gz: f40e4947fea97543686caceba0082bdba30b5ae0485a25b41004ad048057b0ad
4
+ data.tar.gz: e45c60300550caf513fdde6bcbc05e68e1063bf9ad8074626bf5f88f4a6f77bd
5
5
  SHA512:
6
- metadata.gz: 8e3c1c07283730e722c450acc308a497756fd501595a02a7fc066d0b3e59b96e1ab1e7941549293b02e41274b176772bdae3779a041eb28f8ae53f5c44308cc0
7
- data.tar.gz: 52e95027de7a595b2eabc49745a11f664e305c18f9926bc9d649642a92fea9846efdd23da699529795d80609b8871b00e77f9379449d2e4f6cb79ecbcf2785db
6
+ metadata.gz: 393fc138a279ee42c3de90c49bcc982e55860f74e2796d4c895d0f2f175894bcb1ec1bbe796811f896a16be9cc97943e1309cbe175bc029a510b4c51b2f700da
7
+ data.tar.gz: d16e8c87e7d12cc90cc1a4babb4873df8f553d9527e1d69a548a250ae0b240f79a6338070bbc88cbb0e23db48c23ef0393cd4b62e0ac673722ace81ce1564895
data/Gemfile CHANGED
@@ -15,4 +15,5 @@
15
15
  #==============================================================================
16
16
 
17
17
  source 'https://rubygems.org'
18
+ gem 'steep', github: 'akihikodaki/steep', branch: 'cld3'
18
19
  gemspec
data/README.md CHANGED
@@ -41,24 +41,6 @@ JRuby has a bug which prevents the feature detection. Apply the following
41
41
  change:
42
42
  https://github.com/jruby/jruby/pull/4118/commits/edad375ef4dcf195b19ce0afe4befac66468c736
43
43
 
44
- #### OpenBSD
45
- Ruby has a bug which recognizes non-fatal linker warnings as fatal. Apply the
46
- following patch to Ruby to workaround the bug.
47
-
48
- ```diff
49
- --- a/lib/mkmf.rb
50
- +++ b/lib/mkmf.rb
51
- @@ -657,7 +657,7 @@ def with_ldflags(flags)
52
- end
53
-
54
- def try_ldflags(flags, opts = {})
55
- - try_link(MAIN_DOES_NOTHING, flags, {:werror => true}.update(opts))
56
- + try_link(MAIN_DOES_NOTHING, flags, {:werror => false}.update(opts))
57
- end
58
-
59
- def append_ldflags(flags, *opts)
60
- ```
61
-
62
44
  ### Troubleshooting
63
45
  `gem install cld3` triggers native library building. If it fails, you are likely
64
46
  to missing required facilities. Make sure C++ compiler and protocol buffers
data/cld3.gemspec CHANGED
@@ -16,7 +16,7 @@
16
16
 
17
17
  Gem::Specification.new do |gem|
18
18
  gem.name = "cld3"
19
- gem.version = "3.4.3"
19
+ gem.version = "3.4.4"
20
20
  gem.summary = "Compact Language Detector v3 (CLD3)"
21
21
  gem.description = "Compact Language Detector v3 (CLD3) is a neural network model for language identification."
22
22
  gem.license = "Apache-2.0"
@@ -27,7 +27,7 @@ Gem::Specification.new do |gem|
27
27
  gem.add_dependency "ffi", [ ">= 1.1.0", "< 1.16.0" ]
28
28
  gem.add_development_dependency "rbs", [ ">= 1.7.0", "< 1.8.0" ]
29
29
  gem.add_development_dependency "rspec", [ ">=3.0.0", "< 3.11.0" ]
30
- gem.add_development_dependency "steep", [ ">= 0.46.0", "< 0.47.0" ]
30
+ gem.add_development_dependency "steep", [ ">= 0.47.0", "< 0.48.0" ]
31
31
  gem.files = Dir[
32
32
  "Gemfile", "LICENSE", "LICENSE_CLD3", "README.md",
33
33
  "cld3.gemspec", "ext/**/*", "lib/**/*", "sig/**/*"
data/ext/cld3/base.o CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/ext/cld3/libcld3.so CHANGED
Binary file
Binary file
Binary file
data/ext/cld3/offsetmap.o CHANGED
Binary file
data/ext/cld3/registry.o CHANGED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/ext/cld3/utils.o CHANGED
Binary file
data/ext/cld3/workspace.o CHANGED
Binary file
data/lib/a.rb ADDED
@@ -0,0 +1,24 @@
1
+ require "cld3"
2
+
3
+ # Kafka text as an example + the word Velcro
4
+ text = "Πολυαγαπημένε πατέρα πρόσφατα Velcro με ρώτησες κάποια φορά γιατί ισχυρίζομαι πως σε φοβάμαι. Εγώ δεν ήξερα, ως συνήθως, τι να σου απαντήσω, εν μέρει ακριβώς λόγω του φόβου που νιώθω για σένα, εν μέρει επειδή στην αιτιολόγηση του φόβου αυτού συγκαταλέγονται πάρα πολλές λεπτομέρειες, που εν τη ρύμη του λόγου εγώ ούτε κατά το ήμισυ δεν θα μπορούσα να τις συγκρατήσω. Κι αν εδώ προσπαθώ να σου απαντήσω γραπτώς, μόνο ανολοκλήρωτο κατά πολύ θα αποβεί και τούτο, επειδή και κατά τη γραφή ο φόβος και οι συνέπειές του με κωλύουν έναντί σου κι επειδή το μέγεθος του υλικού εν γένει υπερβαίνει κατά πολύ τη μνήμη μου και το λογικό μου. Για σένα το ζήτημα αποδεικνυόταν πάντοτε πολύ απλό, τουλάχιστον στον βαθμό που μιλούσες εσύ γι’ αυτό ενώπιόν μου και, αδιακρίτως, ενώπιον πολλών άλλων. Εσένα σου φαινόταν να είναι κάπως έτσι: Εσύ εργαζόσουν σκληρά σ’ όλη σου τη ζωή, τα πάντα για τα παιδιά σου, προ πάντων για εμένα τα θυσίαζες, εγώ έκαμνα συνεπώς «ζωή χαρισάμενη», είχα πλήρη ελευθερία να μάθω ό,τι ήθελα, κανέναν λόγο δεν είχα να έχω έγνοιες για την καθημερινή διατροφή, να έχω έγνοιες συνεπώς εν γένει• εσύ αντ’ αυτών καμμίαν ευγνωμοσύνη δεν αξίωνες, γνωρίζεις «την ευγνωμοσύνη των παιδιών, αλλά εν τούτοις τουλάχιστον μια "
5
+ pp text.bytesize
6
+
7
+ 200.times { |i|
8
+ max_bytes = 500 + i * 10
9
+ cld3 = CLD3::NNetLanguageIdentifier.new("foo", max_bytes)
10
+
11
+ lang = cld3.find_language(text)
12
+ lang2 = cld3.find_top_n_most_freq_langs(text, 1)
13
+
14
+ puts "When max_bytes is #{max_bytes} probability is less than 0.999: #{lang.probability}" if lang.probability < 0.999
15
+
16
+ if lang.language != :el
17
+ puts "When max_bytes is #{max_bytes} then cld3::find_language returns #{lang.language},
18
+ find_top_n_most_freq_langs returns #{lang2.first.language}"
19
+ #pp lang
20
+ #pp lang2
21
+ end
22
+ }
23
+
24
+ puts "Size: #{text.length} - Bytesize: #{text.encode(Encoding::UTF_8).bytesize}"
data/lib/cld3.rb CHANGED
@@ -74,14 +74,15 @@ module CLD3
74
74
  # @type const Result: untyped
75
75
  Result = Struct.new(:language, :probability, :reliable?, :proportion, :byte_ranges)
76
76
 
77
- # The arguments are two String objects.
77
+ # The arguments are two Numeric objects.
78
78
  def initialize(min_num_bytes = MIN_NUM_BYTES_TO_CONSIDER, max_num_bytes = MAX_NUM_BYTES_TO_CONSIDER)
79
+ raise ArgumentError if max_num_bytes <= 0 || min_num_bytes < 0 || min_num_bytes >= max_num_bytes
79
80
  @cc = Unstable::NNetLanguageIdentifier::Pointer.new(Unstable.new_NNetLanguageIdentifier(min_num_bytes, max_num_bytes))
80
81
  end
81
82
 
82
83
  # Finds the most likely language for the given text, along with additional
83
84
  # information (e.g., probability). The prediction is based on the first N
84
- # bytes where N is the minumum between the number of interchange valid UTF8
85
+ # bytes where N is the minimum between the number of interchange valid UTF8
85
86
  # bytes and +max_num_bytes_+. If N is less than +min_num_bytes_+ long, then
86
87
  # this function returns nil.
87
88
  # The argument is a String object.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cld3
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.4.3
4
+ version: 3.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Akihiko Odaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-25 00:00:00.000000000 Z
11
+ date: 2022-01-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -76,20 +76,20 @@ dependencies:
76
76
  requirements:
77
77
  - - ">="
78
78
  - !ruby/object:Gem::Version
79
- version: 0.46.0
79
+ version: 0.47.0
80
80
  - - "<"
81
81
  - !ruby/object:Gem::Version
82
- version: 0.47.0
82
+ version: 0.48.0
83
83
  type: :development
84
84
  prerelease: false
85
85
  version_requirements: !ruby/object:Gem::Requirement
86
86
  requirements:
87
87
  - - ">="
88
88
  - !ruby/object:Gem::Version
89
- version: 0.46.0
89
+ version: 0.47.0
90
90
  - - "<"
91
91
  - !ruby/object:Gem::Version
92
- version: 0.47.0
92
+ version: 0.48.0
93
93
  description: Compact Language Detector v3 (CLD3) is a neural network model for language
94
94
  identification.
95
95
  email: akihiko.odaki@gmail.com
@@ -199,6 +199,7 @@ files:
199
199
  - ext/cld3/workspace.cc
200
200
  - ext/cld3/workspace.h
201
201
  - ext/cld3/workspace.o
202
+ - lib/a.rb
202
203
  - lib/cld3.rb
203
204
  - lib/cld3/unstable.rb
204
205
  - sig/cld3.rbs