cld3 3.2.6 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0ee3c6166247aaf958310ffa9976400fcfa5050eb1969dd17e186e3500dd06d9
4
- data.tar.gz: bfa75958e205683dfa2429d388bb40d18d237ef5c2b5877a9fd718489f95b7bd
3
+ metadata.gz: 44d2292a62861aa8551a46b69ebf0d55f518bf07ab7b20605ec7db61cd58d6c4
4
+ data.tar.gz: e706b2bc83c2f4915c95c0f9a9d60b8f4728626ca47f705cd3bd0dbc2c555c11
5
5
  SHA512:
6
- metadata.gz: ac1fb08ebf438995878bb7c992bc2e2a71adf9d0f06f01316121b6d8d48f5b8f2f1ea9a3f68f501dad6682168b7a3e16b0137be16cae7ad876d0bce9f6d866e7
7
- data.tar.gz: 40e1036c1c7e08af1caed6efd187a04bb8883f9ff427c61824193f72f58e696279611575f7a63b8cbd80fff2c9f20da644807e71be2ea281a8d870e3721410bd
6
+ metadata.gz: 1f70a575dbb1c18ceb0c9f79588e1cdd1d15a09fc2b0ea8cd0ea6dbc24837d2a2d9619bc555349552b64e9b3ec29b51e0dd062384a2f798ae27aa2ddd3803cb7
7
+ data.tar.gz: 59952aaf853da6e5acc5a1043f1ccf1777ed708848d9be7ba51e64fc0f1667a4f9319faaa118e07c1fb1ef2eac80f8cc0112fe5d9224b0c3fa733bb33221498c
data/README.md CHANGED
@@ -8,11 +8,11 @@ require 'cld3'
8
8
 
9
9
  cld3 = CLD3::NNetLanguageIdentifier.new(0, 1000)
10
10
 
11
- cld3.find_language("こんにちは") # => #<struct Struct::Result language=:ja, probability=1.0, reliable?=true, proportion=1.0>
11
+ cld3.find_language("こんにちは") # => #<struct Struct::Result language=:ja, probability=1.0, reliable?=true, proportion=1.0, byte_ranges=[]>
12
12
 
13
- cld3.find_language("This is a pen.") # => #<struct Struct::Result language=:en, probability=0.9999408721923828, reliable?=true, proportion=1.0>
13
+ cld3.find_language("This is a pen.") # => #<struct Struct::Result language=:en, probability=0.9999408721923828, reliable?=true, proportion=1.0, byte_ranges=[]>
14
14
 
15
- cld3.find_language("здравствуйте") # => #<struct Struct::Result language=:ru, probability=0.3140212297439575, reliable?=false, proportion=1.0>
15
+ cld3.find_language("здравствуйте") # => #<struct Struct::Result language=:ru, probability=0.3140212297439575, reliable?=false, proportion=1.0, byte_ranges=[]>
16
16
  ```
17
17
 
18
18
  ## Installation
@@ -16,7 +16,7 @@
16
16
 
17
17
  Gem::Specification.new do |gem|
18
18
  gem.name = "cld3"
19
- gem.version = "3.2.6"
19
+ gem.version = "3.3.0"
20
20
  gem.summary = "Compact Language Detector v3 (CLD3)"
21
21
  gem.description = "Compact Language Detector v3 (CLD3) is a neural network model for language identification."
22
22
  gem.license = "Apache-2.0"
@@ -26,42 +26,90 @@ limitations under the License.
26
26
  #define EXPORT __attribute__ ((visibility ("default")))
27
27
  #endif
28
28
 
29
- struct NNetLanguageIdentifier {
30
- chrome_lang_id::NNetLanguageIdentifier context;
31
- std::string language;
32
- };
33
-
34
29
  struct Result {
35
30
  struct {
36
31
  const char *data;
37
32
  std::size_t size;
38
33
  } language;
34
+ struct {
35
+ const chrome_lang_id::NNetLanguageIdentifier::SpanInfo *data;
36
+ std::size_t size;
37
+ } byte_ranges;
39
38
  float probability;
40
39
  float proportion;
41
40
  bool is_reliable;
42
41
  };
43
42
 
43
+ struct OwningResult {
44
+ OwningResult(chrome_lang_id::NNetLanguageIdentifier::Result&& result) {
45
+ references.language = std::move(result.language);
46
+ references.byte_ranges = std::move(result.byte_ranges);
47
+ plain.language.data = references.language.data();
48
+ plain.language.size = references.language.size();
49
+ plain.byte_ranges.data = references.byte_ranges.data();
50
+ plain.byte_ranges.size = references.byte_ranges.size();
51
+ plain.probability = result.probability;
52
+ plain.proportion = result.proportion;
53
+ plain.is_reliable = result.is_reliable;
54
+ }
55
+
56
+ Result plain;
57
+ struct {
58
+ std::string language;
59
+ std::vector<chrome_lang_id::NNetLanguageIdentifier::SpanInfo> byte_ranges;
60
+ } references;
61
+ };
62
+
44
63
  extern "C" {
45
- EXPORT Result NNetLanguageIdentifier_find_language(void *pointer,
46
- const char *data,
47
- std::size_t size) {
48
- auto instance = static_cast<NNetLanguageIdentifier *>(pointer);
49
- auto result = instance->context.FindLanguage(std::string(data, size));
50
- instance->language = std::move(result.language);
51
-
52
- return Result {
53
- { instance->language.data(), instance->language.size() },
54
- result.probability,
55
- result.proportion,
56
- result.is_reliable
57
- };
64
+ EXPORT OwningResult *NNetLanguageIdentifier_find_language(
65
+ chrome_lang_id::NNetLanguageIdentifier *instance,
66
+ const char *data,
67
+ std::size_t size) {
68
+ return new OwningResult(instance->FindLanguage(std::string(data, size)));
58
69
  }
59
70
 
60
- EXPORT void delete_NNetLanguageIdentifier(void *pointer) {
61
- delete static_cast<NNetLanguageIdentifier *>(pointer);
71
+ EXPORT std::vector<chrome_lang_id::NNetLanguageIdentifier::Result>*
72
+ NNetLanguageIdentifier_find_top_n_most_freq_langs(
73
+ chrome_lang_id::NNetLanguageIdentifier *instance,
74
+ const char *data, std::size_t size, int num_langs) {
75
+ std::string text(data, size);
76
+ return new auto(instance->FindTopNMostFreqLangs(text, num_langs));
62
77
  }
63
78
 
64
- EXPORT void *new_NNetLanguageIdentifier(int min_num_bytes, int max_num_bytes) {
65
- return new NNetLanguageIdentifier{{min_num_bytes, max_num_bytes}, {}};
79
+ EXPORT void delete_NNetLanguageIdentifier(
80
+ chrome_lang_id::NNetLanguageIdentifier *pointer) {
81
+ delete pointer;
82
+ }
83
+
84
+ EXPORT void delete_result(OwningResult *pointer) {
85
+ delete pointer;
86
+ }
87
+
88
+ EXPORT void delete_results(
89
+ std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> *pointer) {
90
+ delete pointer;
91
+ }
92
+
93
+ EXPORT chrome_lang_id::NNetLanguageIdentifier *new_NNetLanguageIdentifier(
94
+ int min_num_bytes, int max_num_bytes) {
95
+ return new chrome_lang_id::NNetLanguageIdentifier(
96
+ min_num_bytes, max_num_bytes);
97
+ }
98
+
99
+ EXPORT Result refer_to_nth_result(
100
+ std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> *results,
101
+ std::size_t index) {
102
+ Result c;
103
+ auto& cc = (*results)[index];
104
+
105
+ c.language.data = cc.language.data();
106
+ c.language.size = cc.language.size();
107
+ c.byte_ranges.data = cc.byte_ranges.data();
108
+ c.byte_ranges.size = cc.byte_ranges.size();
109
+ c.probability = cc.probability;
110
+ c.proportion = cc.proportion;
111
+ c.is_reliable = cc.is_reliable;
112
+
113
+ return c;
66
114
  }
67
115
  }
@@ -49,10 +49,15 @@ module CLD3
49
49
  # This is Numeric object.
50
50
  RELIABILITY_HR_BS_THRESHOLD = 0.5
51
51
 
52
+ # Holds probability that Span, specified by start/end indices, is a given
53
+ # language. The langauge is not stored here; it can be found in Result, which
54
+ # holds an Array of SpanInfo.
55
+ SpanInfo = Struct.new(:start_index, :end_index, :probability)
56
+
52
57
  # Information about a predicted language.
53
58
  # This is an instance of Struct with the following members:
54
59
  #
55
- # [language] This is symbol or nil.
60
+ # [language] This is symbol.
56
61
  #
57
62
  # [probability] Language probability. This is Numeric object.
58
63
  #
@@ -61,33 +66,95 @@ module CLD3
61
66
  # [proportion] Proportion of bytes associated with the language. If
62
67
  # #find_language is called, this variable is set to 1.
63
68
  # This is Numeric object.
64
- Result = Struct.new("Result", :language, :probability, :reliable?, :proportion)
69
+ #
70
+ # [byte_ranges] Specifies the byte ranges in UTF-8 that |language| applies to.
71
+ # This is an Array of SpanInfo.
72
+ Result = Struct.new(:language, :probability, :reliable?, :proportion, :byte_ranges)
65
73
 
66
74
  # The arguments are two String objects.
67
- def initialize(minNumBytes = MIN_NUM_BYTES_TO_CONSIDER, maxNumBytes = MAX_NUM_BYTES_TO_CONSIDER)
68
- @cc = Unstable::NNetLanguageIdentifier::Pointer.new(Unstable.new_NNetLanguageIdentifier(minNumBytes, maxNumBytes))
75
+ def initialize(min_num_bytes = MIN_NUM_BYTES_TO_CONSIDER, max_num_bytes = MAX_NUM_BYTES_TO_CONSIDER)
76
+ @cc = Unstable::NNetLanguageIdentifier::Pointer.new(Unstable.new_NNetLanguageIdentifier(min_num_bytes, max_num_bytes))
69
77
  end
70
78
 
71
79
  # Finds the most likely language for the given text, along with additional
72
80
  # information (e.g., probability). The prediction is based on the first N
73
81
  # bytes where N is the minumum between the number of interchange valid UTF8
74
82
  # bytes and +max_num_bytes_+. If N is less than +min_num_bytes_+ long, then
75
- # this function returns nil as language.
83
+ # this function returns nil.
76
84
  # The argument is a String object.
77
85
  # The returned value of this function is an instance of Result.
78
86
  def find_language(text)
79
87
  text_utf8 = text.encode(Encoding::UTF_8)
80
88
  pointer = FFI::MemoryPointer.new(:char, text_utf8.bytesize)
81
- pointer.put_bytes(0, text_utf8)
82
89
 
83
- cc_result = Unstable.NNetLanguageIdentifier_find_language(@cc, pointer, text_utf8.bytesize)
84
- language = cc_result[:language_data].read_bytes(cc_result[:language_size])
90
+ begin
91
+ pointer.put_bytes(0, text_utf8)
92
+
93
+ result = Unstable.NNetLanguageIdentifier_find_language(@cc, pointer, text_utf8.bytesize)
94
+ begin
95
+ convert_result Unstable::NNetLanguageIdentifier::Result.new(result)
96
+ ensure
97
+ Unstable.delete_result result
98
+ end
99
+ ensure
100
+ pointer.free
101
+ end
102
+ end
103
+
104
+ # Splits the input text (up to the first byte, if any, that is not
105
+ # interchange valid UTF8) into spans based on the script, predicts a language
106
+ # for each span, and returns a vector storing the top num_langs most frequent
107
+ # languages along with additional information (e.g., proportions). The number
108
+ # of bytes considered for each span is the minimum between the size of the
109
+ # span and +max_num_bytes_+. If more languages are requested than what is
110
+ # available in the input, then the number of the returned elements will be
111
+ # the number of the latter. Also, if the size of the span is less than
112
+ # +min_num_bytes_+ long, then the span is skipped. If the input text is too
113
+ # long, only the first +MAX_NUM_INPUT_BYTES_TO_CONSIDER+ bytes are processed.
114
+ # The first argument is a String object.
115
+ # The second argument is Numeric object.
116
+ # The returned value of this functions is an Array of Result instances.
117
+ def find_top_n_most_freq_langs(text, num_langs)
118
+ text_utf8 = text.encode(Encoding::UTF_8)
119
+ pointer = FFI::MemoryPointer.new(:char, text_utf8.bytesize)
120
+
121
+ begin
122
+ pointer.put_bytes(0, text_utf8)
123
+
124
+ results = Unstable.NNetLanguageIdentifier_find_top_n_most_freq_langs(@cc, pointer, text_utf8.bytesize, num_langs)
125
+ begin
126
+ num_langs.times
127
+ .lazy
128
+ .map { |index| convert_result Unstable.refer_to_nth_result(results, index) }
129
+ .take_while { |result| !result.nil? }
130
+ .to_a
131
+ ensure
132
+ Unstable.delete_results results
133
+ end
134
+ ensure
135
+ pointer.free
136
+ end
137
+ end
138
+
139
+ private
140
+
141
+ def convert_result(result)
142
+ language = result[:language_data].read_bytes(result[:language_size])
143
+ return nil if language == "und"
144
+
145
+ cursor = result[:byte_ranges_data]
146
+ byte_ranges = result[:byte_ranges_size].times.map do
147
+ info = Unstable::NNetLanguageIdentifier::SpanInfo.new(cursor)
148
+ cursor += Unstable::NNetLanguageIdentifier::SpanInfo.size
149
+ SpanInfo.new(info[:start_index], info[:end_index], info[:probability])
150
+ end
85
151
 
86
152
  Result.new(
87
- language == "und" ? nil : language.to_sym,
88
- cc_result[:probability],
89
- cc_result[:reliable?],
90
- cc_result[:proportion])
153
+ language.to_sym,
154
+ result[:probability],
155
+ result[:reliable?],
156
+ result[:proportion],
157
+ byte_ranges)
91
158
  end
92
159
  end
93
160
 
@@ -121,17 +188,30 @@ module CLD3
121
188
  end
122
189
  end
123
190
 
191
+ class SpanInfo < FFI::Struct
192
+ layout :start_index, :int, :end_index, :int, :probability, :float
193
+ end
194
+
124
195
  class Result < FFI::Struct
125
- layout :language_data, :pointer, :language_size, :size_t, :probability, :float, :proportion, :float, :reliable?, :bool
196
+ layout :language_data, :pointer, :language_size, :size_t, :byte_ranges_data, :pointer, :byte_ranges_size, :size_t, :probability, :float, :proportion, :float, :reliable?, :bool
126
197
  end
127
198
  end
128
199
 
129
200
  attach_function :delete_NNetLanguageIdentifier, [ :pointer ], :void
130
201
 
202
+ attach_function :delete_result, [ :pointer ], :void
203
+
204
+ attach_function :delete_results, [ :pointer ], :void
205
+
131
206
  attach_function :new_NNetLanguageIdentifier, [ :int, :int ], :pointer
132
207
 
208
+ attach_function :refer_to_nth_result, [ :pointer, :size_t ], NNetLanguageIdentifier::Result.by_value
209
+
133
210
  attach_function :NNetLanguageIdentifier_find_language,
134
- [ :pointer, :buffer_in, :size_t ], NNetLanguageIdentifier::Result.by_value
211
+ [ :pointer, :buffer_in, :size_t ], :pointer
212
+
213
+ attach_function :NNetLanguageIdentifier_find_top_n_most_freq_langs,
214
+ [ :pointer, :buffer_in, :size_t, :int ], :pointer
135
215
  end
136
216
 
137
217
  private_constant :Unstable
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cld3
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.6
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Akihiko Odaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-04 00:00:00.000000000 Z
11
+ date: 2020-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -63,101 +63,70 @@ files:
63
63
  - LICENSE_CLD3
64
64
  - README.md
65
65
  - cld3.gemspec
66
- - ext/cld3/Makefile
67
66
  - ext/cld3/base.cc
68
67
  - ext/cld3/base.h
69
- - ext/cld3/base.o
70
68
  - ext/cld3/casts.h
71
69
  - ext/cld3/embedding_feature_extractor.cc
72
70
  - ext/cld3/embedding_feature_extractor.h
73
- - ext/cld3/embedding_feature_extractor.o
74
71
  - ext/cld3/embedding_network.cc
75
72
  - ext/cld3/embedding_network.h
76
- - ext/cld3/embedding_network.o
77
73
  - ext/cld3/embedding_network_params.h
78
74
  - ext/cld3/extconf.rb
79
75
  - ext/cld3/feature_extractor.cc
80
76
  - ext/cld3/feature_extractor.h
81
- - ext/cld3/feature_extractor.o
82
- - ext/cld3/feature_extractor.pb.o
83
77
  - ext/cld3/feature_extractor.proto
84
78
  - ext/cld3/feature_types.cc
85
79
  - ext/cld3/feature_types.h
86
- - ext/cld3/feature_types.o
87
80
  - ext/cld3/fixunicodevalue.cc
88
81
  - ext/cld3/fixunicodevalue.h
89
- - ext/cld3/fixunicodevalue.o
90
82
  - ext/cld3/float16.h
91
83
  - ext/cld3/fml_parser.cc
92
84
  - ext/cld3/fml_parser.h
93
- - ext/cld3/fml_parser.o
94
85
  - ext/cld3/generated_entities.cc
95
- - ext/cld3/generated_entities.o
96
86
  - ext/cld3/generated_ulscript.cc
97
87
  - ext/cld3/generated_ulscript.h
98
- - ext/cld3/generated_ulscript.o
99
88
  - ext/cld3/getonescriptspan.cc
100
89
  - ext/cld3/getonescriptspan.h
101
- - ext/cld3/getonescriptspan.o
102
90
  - ext/cld3/integral_types.h
103
91
  - ext/cld3/lang_id_nn_params.cc
104
92
  - ext/cld3/lang_id_nn_params.h
105
- - ext/cld3/lang_id_nn_params.o
106
93
  - ext/cld3/language_identifier_features.cc
107
94
  - ext/cld3/language_identifier_features.h
108
- - ext/cld3/language_identifier_features.o
109
- - ext/cld3/libcld3.so
110
- - ext/cld3/mkmf.log
111
95
  - ext/cld3/nnet_language_identifier.cc
112
96
  - ext/cld3/nnet_language_identifier.h
113
- - ext/cld3/nnet_language_identifier.o
114
97
  - ext/cld3/nnet_language_identifier_c.cc
115
- - ext/cld3/nnet_language_identifier_c.o
116
98
  - ext/cld3/offsetmap.cc
117
99
  - ext/cld3/offsetmap.h
118
- - ext/cld3/offsetmap.o
119
100
  - ext/cld3/port.h
120
101
  - ext/cld3/registry.cc
121
102
  - ext/cld3/registry.h
122
- - ext/cld3/registry.o
123
103
  - ext/cld3/relevant_script_feature.cc
124
104
  - ext/cld3/relevant_script_feature.h
125
- - ext/cld3/relevant_script_feature.o
126
105
  - ext/cld3/script_detector.h
127
- - ext/cld3/sentence.pb.o
128
106
  - ext/cld3/sentence.proto
129
107
  - ext/cld3/sentence_features.cc
130
108
  - ext/cld3/sentence_features.h
131
- - ext/cld3/sentence_features.o
132
109
  - ext/cld3/simple_adder.h
133
110
  - ext/cld3/stringpiece.h
134
111
  - ext/cld3/task_context.cc
135
112
  - ext/cld3/task_context.h
136
- - ext/cld3/task_context.o
137
113
  - ext/cld3/task_context_params.cc
138
114
  - ext/cld3/task_context_params.h
139
- - ext/cld3/task_context_params.o
140
- - ext/cld3/task_spec.pb.o
141
115
  - ext/cld3/task_spec.proto
142
116
  - ext/cld3/text_processing.cc
143
117
  - ext/cld3/text_processing.h
144
- - ext/cld3/text_processing.o
145
118
  - ext/cld3/unicodetext.cc
146
119
  - ext/cld3/unicodetext.h
147
- - ext/cld3/unicodetext.o
148
120
  - ext/cld3/utf8acceptinterchange.h
149
121
  - ext/cld3/utf8prop_lettermarkscriptnum.h
150
122
  - ext/cld3/utf8repl_lettermarklower.h
151
123
  - ext/cld3/utf8scannot_lettermarkspecial.h
152
124
  - ext/cld3/utf8statetable.cc
153
125
  - ext/cld3/utf8statetable.h
154
- - ext/cld3/utf8statetable.o
155
126
  - ext/cld3/utils.cc
156
127
  - ext/cld3/utils.h
157
- - ext/cld3/utils.o
158
128
  - ext/cld3/workspace.cc
159
129
  - ext/cld3/workspace.h
160
- - ext/cld3/workspace.o
161
130
  - lib/cld3.rb
162
131
  homepage: https://github.com/akihikodaki/cld3-ruby
163
132
  licenses:
@@ -181,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
150
  - !ruby/object:Gem::Version
182
151
  version: '0'
183
152
  requirements: []
184
- rubygems_version: 3.0.6
153
+ rubygems_version: 3.1.2
185
154
  signing_key:
186
155
  specification_version: 4
187
156
  summary: Compact Language Detector v3 (CLD3)
@@ -1,266 +0,0 @@
1
-
2
- SHELL = /bin/sh
3
-
4
- # V=0 quiet, V=1 verbose. other values don't work.
5
- V = 0
6
- Q1 = $(V:1=)
7
- Q = $(Q1:0=@)
8
- ECHO1 = $(V:1=@ :)
9
- ECHO = $(ECHO1:0=@ echo)
10
- NULLCMD = :
11
-
12
- #### Start of system configuration section. ####
13
-
14
- srcdir = .
15
- topdir = /usr/include/ruby-2.6.0
16
- hdrdir = $(topdir)
17
- arch_hdrdir = /usr/include/ruby-2.6.0/x86_64-linux
18
- PATH_SEPARATOR = :
19
- VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
20
- prefix = $(DESTDIR)/usr
21
- rubysitearchprefix = $(rubylibprefix)/$(sitearch)
22
- rubyarchprefix = $(rubylibprefix)/$(arch)
23
- rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
24
- exec_prefix = $(prefix)
25
- vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
26
- sitearchhdrdir = $(sitehdrdir)/$(sitearch)
27
- rubyarchhdrdir = $(rubyhdrdir)/$(arch)
28
- vendorhdrdir = $(rubyhdrdir)/vendor_ruby
29
- sitehdrdir = $(rubyhdrdir)/site_ruby
30
- rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
31
- vendorarchdir = $(vendorlibdir)/$(sitearch)
32
- vendorlibdir = $(vendordir)/$(ruby_version)
33
- vendordir = $(rubylibprefix)/vendor_ruby
34
- sitearchdir = $(sitelibdir)/$(sitearch)
35
- sitelibdir = $(sitedir)/$(ruby_version)
36
- sitedir = $(rubylibprefix)/site_ruby
37
- rubyarchdir = $(rubylibdir)/$(arch)
38
- rubylibdir = $(rubylibprefix)/$(ruby_version)
39
- sitearchincludedir = $(includedir)/$(sitearch)
40
- archincludedir = $(includedir)/$(arch)
41
- sitearchlibdir = $(libdir)/$(sitearch)
42
- archlibdir = $(libdir)/$(arch)
43
- ridir = $(datarootdir)/$(RI_BASE_NAME)
44
- mandir = $(datarootdir)/man
45
- localedir = $(datarootdir)/locale
46
- libdir = $(exec_prefix)/lib
47
- psdir = $(docdir)
48
- pdfdir = $(docdir)
49
- dvidir = $(docdir)
50
- htmldir = $(docdir)
51
- infodir = $(datarootdir)/info
52
- docdir = $(datarootdir)/doc/$(PACKAGE)
53
- oldincludedir = $(DESTDIR)/usr/include
54
- includedir = $(prefix)/include
55
- runstatedir = $(localstatedir)/run
56
- localstatedir = $(DESTDIR)/var
57
- sharedstatedir = $(DESTDIR)/var/lib
58
- sysconfdir = $(DESTDIR)/etc
59
- datadir = $(datarootdir)
60
- datarootdir = $(prefix)/share
61
- libexecdir = $(DESTDIR)/usr/lib/ruby
62
- sbindir = $(exec_prefix)/sbin
63
- bindir = $(exec_prefix)/bin
64
- archdir = $(rubyarchdir)
65
-
66
-
67
- CC_WRAPPER =
68
- CC = gcc
69
- CXX = g++
70
- LIBRUBY = $(LIBRUBY_SO)
71
- LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
72
- LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
73
- LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static $(MAINLIBS)
74
- empty =
75
- OUTFLAG = -o $(empty)
76
- COUTFLAG = -o $(empty)
77
- CSRCFLAG = $(empty)
78
-
79
- RUBY_EXTCONF_H =
80
- cflags = $(optflags) $(debugflags) $(warnflags)
81
- cxxflags = $(optflags) $(debugflags) $(warnflags)
82
- optflags = -O3
83
- debugflags = -ggdb3
84
- warnflags = -Wall -Wextra -Wdeclaration-after-statement -Wdeprecated-declarations -Wduplicated-cond -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wrestrict -Wwrite-strings -Wimplicit-fallthrough=0 -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-packed-bitfield-compat -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wsuggest-attribute=format -Wsuggest-attribute=noreturn -Wunused-variable
85
- cppflags =
86
- CCDLFLAGS = -fPIC
87
- CFLAGS = $(CCDLFLAGS) -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fPIC -pthread $(ARCH_FLAG)
88
- INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
89
- DEFS =
90
- CPPFLAGS = -D_FORTIFY_SOURCE=2 $(DEFS) $(cppflags)
91
- CXXFLAGS = $(CCDLFLAGS) -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -pthread -fvisibility=hidden -std=c++11 $(ARCH_FLAG)
92
- ldflags = -L. -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now -fstack-protector-strong -rdynamic -Wl,-export-dynamic
93
- dldflags = -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now -Wl,--compress-debug-sections=zlib
94
- ARCH_FLAG =
95
- DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
96
- LDSHARED = $(CC) -shared
97
- LDSHAREDXX = $(CXX) -shared
98
- AR = ar
99
- EXEEXT =
100
-
101
- RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
102
- RUBY_SO_NAME = ruby
103
- RUBYW_INSTALL_NAME =
104
- RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
105
- RUBYW_BASE_NAME = rubyw
106
- RUBY_BASE_NAME = ruby
107
-
108
- arch = x86_64-linux
109
- sitearch = $(arch)
110
- ruby_version = 2.6.0
111
- ruby = $(bindir)/$(RUBY_BASE_NAME)
112
- RUBY = $(ruby)
113
- ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
114
-
115
- RM = rm -f
116
- RM_RF = $(RUBY) -run -e rm -- -rf
117
- RMDIRS = rmdir --ignore-fail-on-non-empty -p
118
- MAKEDIRS = /usr/bin/mkdir -p
119
- INSTALL = /usr/bin/install -c
120
- INSTALL_PROG = $(INSTALL) -m 0755
121
- INSTALL_DATA = $(INSTALL) -m 644
122
- COPY = cp
123
- TOUCH = exit >
124
-
125
- #### End of system configuration section. ####
126
-
127
- preload =
128
- libpath = . $(libdir)
129
- LIBPATH = -L. -L$(libdir)
130
- DEFFILE =
131
-
132
- CLEANFILES = mkmf.log
133
- DISTCLEANFILES =
134
- DISTCLEANDIRS =
135
-
136
- extout =
137
- extout_prefix =
138
- target_prefix =
139
- LOCAL_LIBS =
140
- LIBS = $(LIBRUBYARG_SHARED) -lprotobuf -lm -lc
141
- ORIG_SRCS = base.cc embedding_feature_extractor.cc embedding_network.cc feature_extractor.cc feature_extractor.pb.cc feature_types.cc fixunicodevalue.cc fml_parser.cc generated_entities.cc generated_ulscript.cc getonescriptspan.cc lang_id_nn_params.cc language_identifier_features.cc nnet_language_identifier.cc nnet_language_identifier_c.cc offsetmap.cc registry.cc relevant_script_feature.cc sentence.pb.cc sentence_features.cc task_context.cc task_context_params.cc task_spec.pb.cc text_processing.cc unicodetext.cc utf8statetable.cc utils.cc workspace.cc
142
- SRCS = $(ORIG_SRCS)
143
- OBJS = base.o embedding_feature_extractor.o embedding_network.o feature_extractor.o feature_extractor.pb.o feature_types.o fixunicodevalue.o fml_parser.o generated_entities.o generated_ulscript.o getonescriptspan.o lang_id_nn_params.o language_identifier_features.o nnet_language_identifier.o nnet_language_identifier_c.o offsetmap.o registry.o relevant_script_feature.o sentence.pb.o sentence_features.o task_context.o task_context_params.o task_spec.pb.o text_processing.o unicodetext.o utf8statetable.o utils.o workspace.o
144
- HDRS = $(srcdir)/base.h $(srcdir)/casts.h $(srcdir)/embedding_feature_extractor.h $(srcdir)/embedding_network.h $(srcdir)/embedding_network_params.h $(srcdir)/feature_extractor.h $(srcdir)/feature_types.h $(srcdir)/float16.h $(srcdir)/fml_parser.h $(srcdir)/language_identifier_features.h $(srcdir)/lang_id_nn_params.h $(srcdir)/nnet_language_identifier.h $(srcdir)/registry.h $(srcdir)/relevant_script_feature.h $(srcdir)/script_detector.h $(srcdir)/sentence_features.h $(srcdir)/simple_adder.h $(srcdir)/fixunicodevalue.h $(srcdir)/generated_ulscript.h $(srcdir)/getonescriptspan.h $(srcdir)/integral_types.h $(srcdir)/offsetmap.h $(srcdir)/port.h $(srcdir)/stringpiece.h $(srcdir)/text_processing.h $(srcdir)/utf8acceptinterchange.h $(srcdir)/utf8prop_lettermarkscriptnum.h $(srcdir)/utf8repl_lettermarklower.h $(srcdir)/utf8scannot_lettermarkspecial.h $(srcdir)/utf8statetable.h $(srcdir)/task_context.h $(srcdir)/task_context_params.h $(srcdir)/unicodetext.h $(srcdir)/utils.h $(srcdir)/workspace.h $(srcdir)/feature_extractor.pb.h $(srcdir)/sentence.pb.h $(srcdir)/task_spec.pb.h
145
- LOCAL_HDRS =
146
- TARGET = libcld3
147
- TARGET_NAME = libcld3
148
- TARGET_ENTRY = Init_$(TARGET_NAME)
149
- DLLIB = $(TARGET).so
150
- EXTSTATIC =
151
- STATIC_LIB =
152
-
153
- TIMESTAMP_DIR = .
154
- BINDIR = $(bindir)
155
- RUBYCOMMONDIR = $(sitedir)$(target_prefix)
156
- RUBYLIBDIR = $(sitelibdir)$(target_prefix)
157
- RUBYARCHDIR = $(sitearchdir)$(target_prefix)
158
- HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
159
- ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
160
- TARGET_SO_DIR =
161
- TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
162
- CLEANLIBS = $(TARGET_SO)
163
- CLEANOBJS = *.o *.bak
164
-
165
- all: $(DLLIB)
166
- static: $(STATIC_LIB)
167
- .PHONY: all install static install-so install-rb
168
- .PHONY: clean clean-so clean-static clean-rb
169
-
170
- clean-static::
171
- clean-rb-default::
172
- clean-rb::
173
- clean-so::
174
- clean: clean-so clean-static clean-rb-default clean-rb
175
- -$(Q)$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
176
-
177
- distclean-rb-default::
178
- distclean-rb::
179
- distclean-so::
180
- distclean-static::
181
- distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
182
- -$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
183
- -$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
184
- -$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
185
-
186
- realclean: distclean
187
- install: install-so install-rb
188
-
189
- install-so: $(DLLIB) $(TIMESTAMP_DIR)/.sitearchdir.time
190
- $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
191
- clean-static::
192
- -$(Q)$(RM) $(STATIC_LIB)
193
- install-rb: pre-install-rb do-install-rb install-rb-default
194
- install-rb-default: pre-install-rb-default do-install-rb-default
195
- pre-install-rb: Makefile
196
- pre-install-rb-default: Makefile
197
- do-install-rb:
198
- do-install-rb-default:
199
- pre-install-rb-default:
200
- @$(NULLCMD)
201
- $(TIMESTAMP_DIR)/.sitearchdir.time:
202
- $(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
203
- $(Q) $(TOUCH) $@
204
-
205
- site-install: site-install-so site-install-rb
206
- site-install-so: install-so
207
- site-install-rb: install-rb
208
-
209
- .SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
210
-
211
- .cc.o:
212
- $(ECHO) compiling $(<)
213
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
214
-
215
- .cc.S:
216
- $(ECHO) translating $(<)
217
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
218
-
219
- .mm.o:
220
- $(ECHO) compiling $(<)
221
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
222
-
223
- .mm.S:
224
- $(ECHO) translating $(<)
225
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
226
-
227
- .cxx.o:
228
- $(ECHO) compiling $(<)
229
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
230
-
231
- .cxx.S:
232
- $(ECHO) translating $(<)
233
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
234
-
235
- .cpp.o:
236
- $(ECHO) compiling $(<)
237
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
238
-
239
- .cpp.S:
240
- $(ECHO) translating $(<)
241
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
242
-
243
- .c.o:
244
- $(ECHO) compiling $(<)
245
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
246
-
247
- .c.S:
248
- $(ECHO) translating $(<)
249
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
250
-
251
- .m.o:
252
- $(ECHO) compiling $(<)
253
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
254
-
255
- .m.S:
256
- $(ECHO) translating $(<)
257
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
258
-
259
- $(TARGET_SO): $(OBJS) Makefile
260
- $(ECHO) linking shared-object $(DLLIB)
261
- -$(Q)$(RM) $(@)
262
- $(Q) $(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
263
-
264
-
265
-
266
- $(OBJS): $(HDRS) $(ruby_headers)
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1,36 +0,0 @@
1
- "pkg-config --exists protobuf"
2
- | pkg-config --libs protobuf
3
- => "-lprotobuf \n"
4
- "gcc -o conftest -I/usr/include/ruby-2.6.0/x86_64-linux -I/usr/include/ruby-2.6.0/ruby/backward -I/usr/include/ruby-2.6.0 -I. -D_FORTIFY_SOURCE=2 -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fPIC conftest.c -L. -L/usr/lib -L. -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now -fstack-protector-strong -rdynamic -Wl,-export-dynamic -lruby -lm -lc"
5
- checked program was:
6
- /* begin */
7
- 1: #include "ruby.h"
8
- 2:
9
- 3: int main(int argc, char **argv)
10
- 4: {
11
- 5: return 0;
12
- 6: }
13
- /* end */
14
-
15
- "gcc -o conftest -I/usr/include/ruby-2.6.0/x86_64-linux -I/usr/include/ruby-2.6.0/ruby/backward -I/usr/include/ruby-2.6.0 -I. -D_FORTIFY_SOURCE=2 -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fPIC conftest.c -L. -L/usr/lib -L. -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now -fstack-protector-strong -rdynamic -Wl,-export-dynamic -lruby -lprotobuf -lm -lc"
16
- checked program was:
17
- /* begin */
18
- 1: #include "ruby.h"
19
- 2:
20
- 3: int main(int argc, char **argv)
21
- 4: {
22
- 5: return 0;
23
- 6: }
24
- /* end */
25
-
26
- | pkg-config --cflags-only-I protobuf
27
- => "\n"
28
- | pkg-config --cflags-only-other protobuf
29
- => "-pthread \n"
30
- | pkg-config --libs-only-l protobuf
31
- => "-lprotobuf \n"
32
- package configuration for protobuf
33
- cflags: -pthread
34
- ldflags:
35
- libs: -lprotobuf
36
-
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file