cld3 3.2.6 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0ee3c6166247aaf958310ffa9976400fcfa5050eb1969dd17e186e3500dd06d9
4
- data.tar.gz: bfa75958e205683dfa2429d388bb40d18d237ef5c2b5877a9fd718489f95b7bd
3
+ metadata.gz: 44d2292a62861aa8551a46b69ebf0d55f518bf07ab7b20605ec7db61cd58d6c4
4
+ data.tar.gz: e706b2bc83c2f4915c95c0f9a9d60b8f4728626ca47f705cd3bd0dbc2c555c11
5
5
  SHA512:
6
- metadata.gz: ac1fb08ebf438995878bb7c992bc2e2a71adf9d0f06f01316121b6d8d48f5b8f2f1ea9a3f68f501dad6682168b7a3e16b0137be16cae7ad876d0bce9f6d866e7
7
- data.tar.gz: 40e1036c1c7e08af1caed6efd187a04bb8883f9ff427c61824193f72f58e696279611575f7a63b8cbd80fff2c9f20da644807e71be2ea281a8d870e3721410bd
6
+ metadata.gz: 1f70a575dbb1c18ceb0c9f79588e1cdd1d15a09fc2b0ea8cd0ea6dbc24837d2a2d9619bc555349552b64e9b3ec29b51e0dd062384a2f798ae27aa2ddd3803cb7
7
+ data.tar.gz: 59952aaf853da6e5acc5a1043f1ccf1777ed708848d9be7ba51e64fc0f1667a4f9319faaa118e07c1fb1ef2eac80f8cc0112fe5d9224b0c3fa733bb33221498c
data/README.md CHANGED
@@ -8,11 +8,11 @@ require 'cld3'
8
8
 
9
9
  cld3 = CLD3::NNetLanguageIdentifier.new(0, 1000)
10
10
 
11
- cld3.find_language("こんにちは") # => #<struct Struct::Result language=:ja, probability=1.0, reliable?=true, proportion=1.0>
11
+ cld3.find_language("こんにちは") # => #<struct Struct::Result language=:ja, probability=1.0, reliable?=true, proportion=1.0, byte_ranges=[]>
12
12
 
13
- cld3.find_language("This is a pen.") # => #<struct Struct::Result language=:en, probability=0.9999408721923828, reliable?=true, proportion=1.0>
13
+ cld3.find_language("This is a pen.") # => #<struct Struct::Result language=:en, probability=0.9999408721923828, reliable?=true, proportion=1.0, byte_ranges=[]>
14
14
 
15
- cld3.find_language("здравствуйте") # => #<struct Struct::Result language=:ru, probability=0.3140212297439575, reliable?=false, proportion=1.0>
15
+ cld3.find_language("здравствуйте") # => #<struct Struct::Result language=:ru, probability=0.3140212297439575, reliable?=false, proportion=1.0, byte_ranges=[]>
16
16
  ```
17
17
 
18
18
  ## Installation
@@ -16,7 +16,7 @@
16
16
 
17
17
  Gem::Specification.new do |gem|
18
18
  gem.name = "cld3"
19
- gem.version = "3.2.6"
19
+ gem.version = "3.3.0"
20
20
  gem.summary = "Compact Language Detector v3 (CLD3)"
21
21
  gem.description = "Compact Language Detector v3 (CLD3) is a neural network model for language identification."
22
22
  gem.license = "Apache-2.0"
@@ -26,42 +26,90 @@ limitations under the License.
26
26
  #define EXPORT __attribute__ ((visibility ("default")))
27
27
  #endif
28
28
 
29
- struct NNetLanguageIdentifier {
30
- chrome_lang_id::NNetLanguageIdentifier context;
31
- std::string language;
32
- };
33
-
34
29
  struct Result {
35
30
  struct {
36
31
  const char *data;
37
32
  std::size_t size;
38
33
  } language;
34
+ struct {
35
+ const chrome_lang_id::NNetLanguageIdentifier::SpanInfo *data;
36
+ std::size_t size;
37
+ } byte_ranges;
39
38
  float probability;
40
39
  float proportion;
41
40
  bool is_reliable;
42
41
  };
43
42
 
43
+ struct OwningResult {
44
+ OwningResult(chrome_lang_id::NNetLanguageIdentifier::Result&& result) {
45
+ references.language = std::move(result.language);
46
+ references.byte_ranges = std::move(result.byte_ranges);
47
+ plain.language.data = references.language.data();
48
+ plain.language.size = references.language.size();
49
+ plain.byte_ranges.data = references.byte_ranges.data();
50
+ plain.byte_ranges.size = references.byte_ranges.size();
51
+ plain.probability = result.probability;
52
+ plain.proportion = result.proportion;
53
+ plain.is_reliable = result.is_reliable;
54
+ }
55
+
56
+ Result plain;
57
+ struct {
58
+ std::string language;
59
+ std::vector<chrome_lang_id::NNetLanguageIdentifier::SpanInfo> byte_ranges;
60
+ } references;
61
+ };
62
+
44
63
  extern "C" {
45
- EXPORT Result NNetLanguageIdentifier_find_language(void *pointer,
46
- const char *data,
47
- std::size_t size) {
48
- auto instance = static_cast<NNetLanguageIdentifier *>(pointer);
49
- auto result = instance->context.FindLanguage(std::string(data, size));
50
- instance->language = std::move(result.language);
51
-
52
- return Result {
53
- { instance->language.data(), instance->language.size() },
54
- result.probability,
55
- result.proportion,
56
- result.is_reliable
57
- };
64
+ EXPORT OwningResult *NNetLanguageIdentifier_find_language(
65
+ chrome_lang_id::NNetLanguageIdentifier *instance,
66
+ const char *data,
67
+ std::size_t size) {
68
+ return new OwningResult(instance->FindLanguage(std::string(data, size)));
58
69
  }
59
70
 
60
- EXPORT void delete_NNetLanguageIdentifier(void *pointer) {
61
- delete static_cast<NNetLanguageIdentifier *>(pointer);
71
+ EXPORT std::vector<chrome_lang_id::NNetLanguageIdentifier::Result>*
72
+ NNetLanguageIdentifier_find_top_n_most_freq_langs(
73
+ chrome_lang_id::NNetLanguageIdentifier *instance,
74
+ const char *data, std::size_t size, int num_langs) {
75
+ std::string text(data, size);
76
+ return new auto(instance->FindTopNMostFreqLangs(text, num_langs));
62
77
  }
63
78
 
64
- EXPORT void *new_NNetLanguageIdentifier(int min_num_bytes, int max_num_bytes) {
65
- return new NNetLanguageIdentifier{{min_num_bytes, max_num_bytes}, {}};
79
+ EXPORT void delete_NNetLanguageIdentifier(
80
+ chrome_lang_id::NNetLanguageIdentifier *pointer) {
81
+ delete pointer;
82
+ }
83
+
84
+ EXPORT void delete_result(OwningResult *pointer) {
85
+ delete pointer;
86
+ }
87
+
88
+ EXPORT void delete_results(
89
+ std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> *pointer) {
90
+ delete pointer;
91
+ }
92
+
93
+ EXPORT chrome_lang_id::NNetLanguageIdentifier *new_NNetLanguageIdentifier(
94
+ int min_num_bytes, int max_num_bytes) {
95
+ return new chrome_lang_id::NNetLanguageIdentifier(
96
+ min_num_bytes, max_num_bytes);
97
+ }
98
+
99
+ EXPORT Result refer_to_nth_result(
100
+ std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> *results,
101
+ std::size_t index) {
102
+ Result c;
103
+ auto& cc = (*results)[index];
104
+
105
+ c.language.data = cc.language.data();
106
+ c.language.size = cc.language.size();
107
+ c.byte_ranges.data = cc.byte_ranges.data();
108
+ c.byte_ranges.size = cc.byte_ranges.size();
109
+ c.probability = cc.probability;
110
+ c.proportion = cc.proportion;
111
+ c.is_reliable = cc.is_reliable;
112
+
113
+ return c;
66
114
  }
67
115
  }
@@ -49,10 +49,15 @@ module CLD3
49
49
  # This is Numeric object.
50
50
  RELIABILITY_HR_BS_THRESHOLD = 0.5
51
51
 
52
+ # Holds probability that Span, specified by start/end indices, is a given
53
+ # language. The langauge is not stored here; it can be found in Result, which
54
+ # holds an Array of SpanInfo.
55
+ SpanInfo = Struct.new(:start_index, :end_index, :probability)
56
+
52
57
  # Information about a predicted language.
53
58
  # This is an instance of Struct with the following members:
54
59
  #
55
- # [language] This is symbol or nil.
60
+ # [language] This is symbol.
56
61
  #
57
62
  # [probability] Language probability. This is Numeric object.
58
63
  #
@@ -61,33 +66,95 @@ module CLD3
61
66
  # [proportion] Proportion of bytes associated with the language. If
62
67
  # #find_language is called, this variable is set to 1.
63
68
  # This is Numeric object.
64
- Result = Struct.new("Result", :language, :probability, :reliable?, :proportion)
69
+ #
70
+ # [byte_ranges] Specifies the byte ranges in UTF-8 that |language| applies to.
71
+ # This is an Array of SpanInfo.
72
+ Result = Struct.new(:language, :probability, :reliable?, :proportion, :byte_ranges)
65
73
 
66
74
  # The arguments are two String objects.
67
- def initialize(minNumBytes = MIN_NUM_BYTES_TO_CONSIDER, maxNumBytes = MAX_NUM_BYTES_TO_CONSIDER)
68
- @cc = Unstable::NNetLanguageIdentifier::Pointer.new(Unstable.new_NNetLanguageIdentifier(minNumBytes, maxNumBytes))
75
+ def initialize(min_num_bytes = MIN_NUM_BYTES_TO_CONSIDER, max_num_bytes = MAX_NUM_BYTES_TO_CONSIDER)
76
+ @cc = Unstable::NNetLanguageIdentifier::Pointer.new(Unstable.new_NNetLanguageIdentifier(min_num_bytes, max_num_bytes))
69
77
  end
70
78
 
71
79
  # Finds the most likely language for the given text, along with additional
72
80
  # information (e.g., probability). The prediction is based on the first N
73
81
  # bytes where N is the minumum between the number of interchange valid UTF8
74
82
  # bytes and +max_num_bytes_+. If N is less than +min_num_bytes_+ long, then
75
- # this function returns nil as language.
83
+ # this function returns nil.
76
84
  # The argument is a String object.
77
85
  # The returned value of this function is an instance of Result.
78
86
  def find_language(text)
79
87
  text_utf8 = text.encode(Encoding::UTF_8)
80
88
  pointer = FFI::MemoryPointer.new(:char, text_utf8.bytesize)
81
- pointer.put_bytes(0, text_utf8)
82
89
 
83
- cc_result = Unstable.NNetLanguageIdentifier_find_language(@cc, pointer, text_utf8.bytesize)
84
- language = cc_result[:language_data].read_bytes(cc_result[:language_size])
90
+ begin
91
+ pointer.put_bytes(0, text_utf8)
92
+
93
+ result = Unstable.NNetLanguageIdentifier_find_language(@cc, pointer, text_utf8.bytesize)
94
+ begin
95
+ convert_result Unstable::NNetLanguageIdentifier::Result.new(result)
96
+ ensure
97
+ Unstable.delete_result result
98
+ end
99
+ ensure
100
+ pointer.free
101
+ end
102
+ end
103
+
104
+ # Splits the input text (up to the first byte, if any, that is not
105
+ # interchange valid UTF8) into spans based on the script, predicts a language
106
+ # for each span, and returns a vector storing the top num_langs most frequent
107
+ # languages along with additional information (e.g., proportions). The number
108
+ # of bytes considered for each span is the minimum between the size of the
109
+ # span and +max_num_bytes_+. If more languages are requested than what is
110
+ # available in the input, then the number of the returned elements will be
111
+ # the number of the latter. Also, if the size of the span is less than
112
+ # +min_num_bytes_+ long, then the span is skipped. If the input text is too
113
+ # long, only the first +MAX_NUM_INPUT_BYTES_TO_CONSIDER+ bytes are processed.
114
+ # The first argument is a String object.
115
+ # The second argument is Numeric object.
116
+ # The returned value of this functions is an Array of Result instances.
117
+ def find_top_n_most_freq_langs(text, num_langs)
118
+ text_utf8 = text.encode(Encoding::UTF_8)
119
+ pointer = FFI::MemoryPointer.new(:char, text_utf8.bytesize)
120
+
121
+ begin
122
+ pointer.put_bytes(0, text_utf8)
123
+
124
+ results = Unstable.NNetLanguageIdentifier_find_top_n_most_freq_langs(@cc, pointer, text_utf8.bytesize, num_langs)
125
+ begin
126
+ num_langs.times
127
+ .lazy
128
+ .map { |index| convert_result Unstable.refer_to_nth_result(results, index) }
129
+ .take_while { |result| !result.nil? }
130
+ .to_a
131
+ ensure
132
+ Unstable.delete_results results
133
+ end
134
+ ensure
135
+ pointer.free
136
+ end
137
+ end
138
+
139
+ private
140
+
141
+ def convert_result(result)
142
+ language = result[:language_data].read_bytes(result[:language_size])
143
+ return nil if language == "und"
144
+
145
+ cursor = result[:byte_ranges_data]
146
+ byte_ranges = result[:byte_ranges_size].times.map do
147
+ info = Unstable::NNetLanguageIdentifier::SpanInfo.new(cursor)
148
+ cursor += Unstable::NNetLanguageIdentifier::SpanInfo.size
149
+ SpanInfo.new(info[:start_index], info[:end_index], info[:probability])
150
+ end
85
151
 
86
152
  Result.new(
87
- language == "und" ? nil : language.to_sym,
88
- cc_result[:probability],
89
- cc_result[:reliable?],
90
- cc_result[:proportion])
153
+ language.to_sym,
154
+ result[:probability],
155
+ result[:reliable?],
156
+ result[:proportion],
157
+ byte_ranges)
91
158
  end
92
159
  end
93
160
 
@@ -121,17 +188,30 @@ module CLD3
121
188
  end
122
189
  end
123
190
 
191
+ class SpanInfo < FFI::Struct
192
+ layout :start_index, :int, :end_index, :int, :probability, :float
193
+ end
194
+
124
195
  class Result < FFI::Struct
125
- layout :language_data, :pointer, :language_size, :size_t, :probability, :float, :proportion, :float, :reliable?, :bool
196
+ layout :language_data, :pointer, :language_size, :size_t, :byte_ranges_data, :pointer, :byte_ranges_size, :size_t, :probability, :float, :proportion, :float, :reliable?, :bool
126
197
  end
127
198
  end
128
199
 
129
200
  attach_function :delete_NNetLanguageIdentifier, [ :pointer ], :void
130
201
 
202
+ attach_function :delete_result, [ :pointer ], :void
203
+
204
+ attach_function :delete_results, [ :pointer ], :void
205
+
131
206
  attach_function :new_NNetLanguageIdentifier, [ :int, :int ], :pointer
132
207
 
208
+ attach_function :refer_to_nth_result, [ :pointer, :size_t ], NNetLanguageIdentifier::Result.by_value
209
+
133
210
  attach_function :NNetLanguageIdentifier_find_language,
134
- [ :pointer, :buffer_in, :size_t ], NNetLanguageIdentifier::Result.by_value
211
+ [ :pointer, :buffer_in, :size_t ], :pointer
212
+
213
+ attach_function :NNetLanguageIdentifier_find_top_n_most_freq_langs,
214
+ [ :pointer, :buffer_in, :size_t, :int ], :pointer
135
215
  end
136
216
 
137
217
  private_constant :Unstable
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cld3
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.6
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Akihiko Odaki
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-04 00:00:00.000000000 Z
11
+ date: 2020-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -63,101 +63,70 @@ files:
63
63
  - LICENSE_CLD3
64
64
  - README.md
65
65
  - cld3.gemspec
66
- - ext/cld3/Makefile
67
66
  - ext/cld3/base.cc
68
67
  - ext/cld3/base.h
69
- - ext/cld3/base.o
70
68
  - ext/cld3/casts.h
71
69
  - ext/cld3/embedding_feature_extractor.cc
72
70
  - ext/cld3/embedding_feature_extractor.h
73
- - ext/cld3/embedding_feature_extractor.o
74
71
  - ext/cld3/embedding_network.cc
75
72
  - ext/cld3/embedding_network.h
76
- - ext/cld3/embedding_network.o
77
73
  - ext/cld3/embedding_network_params.h
78
74
  - ext/cld3/extconf.rb
79
75
  - ext/cld3/feature_extractor.cc
80
76
  - ext/cld3/feature_extractor.h
81
- - ext/cld3/feature_extractor.o
82
- - ext/cld3/feature_extractor.pb.o
83
77
  - ext/cld3/feature_extractor.proto
84
78
  - ext/cld3/feature_types.cc
85
79
  - ext/cld3/feature_types.h
86
- - ext/cld3/feature_types.o
87
80
  - ext/cld3/fixunicodevalue.cc
88
81
  - ext/cld3/fixunicodevalue.h
89
- - ext/cld3/fixunicodevalue.o
90
82
  - ext/cld3/float16.h
91
83
  - ext/cld3/fml_parser.cc
92
84
  - ext/cld3/fml_parser.h
93
- - ext/cld3/fml_parser.o
94
85
  - ext/cld3/generated_entities.cc
95
- - ext/cld3/generated_entities.o
96
86
  - ext/cld3/generated_ulscript.cc
97
87
  - ext/cld3/generated_ulscript.h
98
- - ext/cld3/generated_ulscript.o
99
88
  - ext/cld3/getonescriptspan.cc
100
89
  - ext/cld3/getonescriptspan.h
101
- - ext/cld3/getonescriptspan.o
102
90
  - ext/cld3/integral_types.h
103
91
  - ext/cld3/lang_id_nn_params.cc
104
92
  - ext/cld3/lang_id_nn_params.h
105
- - ext/cld3/lang_id_nn_params.o
106
93
  - ext/cld3/language_identifier_features.cc
107
94
  - ext/cld3/language_identifier_features.h
108
- - ext/cld3/language_identifier_features.o
109
- - ext/cld3/libcld3.so
110
- - ext/cld3/mkmf.log
111
95
  - ext/cld3/nnet_language_identifier.cc
112
96
  - ext/cld3/nnet_language_identifier.h
113
- - ext/cld3/nnet_language_identifier.o
114
97
  - ext/cld3/nnet_language_identifier_c.cc
115
- - ext/cld3/nnet_language_identifier_c.o
116
98
  - ext/cld3/offsetmap.cc
117
99
  - ext/cld3/offsetmap.h
118
- - ext/cld3/offsetmap.o
119
100
  - ext/cld3/port.h
120
101
  - ext/cld3/registry.cc
121
102
  - ext/cld3/registry.h
122
- - ext/cld3/registry.o
123
103
  - ext/cld3/relevant_script_feature.cc
124
104
  - ext/cld3/relevant_script_feature.h
125
- - ext/cld3/relevant_script_feature.o
126
105
  - ext/cld3/script_detector.h
127
- - ext/cld3/sentence.pb.o
128
106
  - ext/cld3/sentence.proto
129
107
  - ext/cld3/sentence_features.cc
130
108
  - ext/cld3/sentence_features.h
131
- - ext/cld3/sentence_features.o
132
109
  - ext/cld3/simple_adder.h
133
110
  - ext/cld3/stringpiece.h
134
111
  - ext/cld3/task_context.cc
135
112
  - ext/cld3/task_context.h
136
- - ext/cld3/task_context.o
137
113
  - ext/cld3/task_context_params.cc
138
114
  - ext/cld3/task_context_params.h
139
- - ext/cld3/task_context_params.o
140
- - ext/cld3/task_spec.pb.o
141
115
  - ext/cld3/task_spec.proto
142
116
  - ext/cld3/text_processing.cc
143
117
  - ext/cld3/text_processing.h
144
- - ext/cld3/text_processing.o
145
118
  - ext/cld3/unicodetext.cc
146
119
  - ext/cld3/unicodetext.h
147
- - ext/cld3/unicodetext.o
148
120
  - ext/cld3/utf8acceptinterchange.h
149
121
  - ext/cld3/utf8prop_lettermarkscriptnum.h
150
122
  - ext/cld3/utf8repl_lettermarklower.h
151
123
  - ext/cld3/utf8scannot_lettermarkspecial.h
152
124
  - ext/cld3/utf8statetable.cc
153
125
  - ext/cld3/utf8statetable.h
154
- - ext/cld3/utf8statetable.o
155
126
  - ext/cld3/utils.cc
156
127
  - ext/cld3/utils.h
157
- - ext/cld3/utils.o
158
128
  - ext/cld3/workspace.cc
159
129
  - ext/cld3/workspace.h
160
- - ext/cld3/workspace.o
161
130
  - lib/cld3.rb
162
131
  homepage: https://github.com/akihikodaki/cld3-ruby
163
132
  licenses:
@@ -181,7 +150,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
181
150
  - !ruby/object:Gem::Version
182
151
  version: '0'
183
152
  requirements: []
184
- rubygems_version: 3.0.6
153
+ rubygems_version: 3.1.2
185
154
  signing_key:
186
155
  specification_version: 4
187
156
  summary: Compact Language Detector v3 (CLD3)
@@ -1,266 +0,0 @@
1
-
2
- SHELL = /bin/sh
3
-
4
- # V=0 quiet, V=1 verbose. other values don't work.
5
- V = 0
6
- Q1 = $(V:1=)
7
- Q = $(Q1:0=@)
8
- ECHO1 = $(V:1=@ :)
9
- ECHO = $(ECHO1:0=@ echo)
10
- NULLCMD = :
11
-
12
- #### Start of system configuration section. ####
13
-
14
- srcdir = .
15
- topdir = /usr/include/ruby-2.6.0
16
- hdrdir = $(topdir)
17
- arch_hdrdir = /usr/include/ruby-2.6.0/x86_64-linux
18
- PATH_SEPARATOR = :
19
- VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
20
- prefix = $(DESTDIR)/usr
21
- rubysitearchprefix = $(rubylibprefix)/$(sitearch)
22
- rubyarchprefix = $(rubylibprefix)/$(arch)
23
- rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
24
- exec_prefix = $(prefix)
25
- vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
26
- sitearchhdrdir = $(sitehdrdir)/$(sitearch)
27
- rubyarchhdrdir = $(rubyhdrdir)/$(arch)
28
- vendorhdrdir = $(rubyhdrdir)/vendor_ruby
29
- sitehdrdir = $(rubyhdrdir)/site_ruby
30
- rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
31
- vendorarchdir = $(vendorlibdir)/$(sitearch)
32
- vendorlibdir = $(vendordir)/$(ruby_version)
33
- vendordir = $(rubylibprefix)/vendor_ruby
34
- sitearchdir = $(sitelibdir)/$(sitearch)
35
- sitelibdir = $(sitedir)/$(ruby_version)
36
- sitedir = $(rubylibprefix)/site_ruby
37
- rubyarchdir = $(rubylibdir)/$(arch)
38
- rubylibdir = $(rubylibprefix)/$(ruby_version)
39
- sitearchincludedir = $(includedir)/$(sitearch)
40
- archincludedir = $(includedir)/$(arch)
41
- sitearchlibdir = $(libdir)/$(sitearch)
42
- archlibdir = $(libdir)/$(arch)
43
- ridir = $(datarootdir)/$(RI_BASE_NAME)
44
- mandir = $(datarootdir)/man
45
- localedir = $(datarootdir)/locale
46
- libdir = $(exec_prefix)/lib
47
- psdir = $(docdir)
48
- pdfdir = $(docdir)
49
- dvidir = $(docdir)
50
- htmldir = $(docdir)
51
- infodir = $(datarootdir)/info
52
- docdir = $(datarootdir)/doc/$(PACKAGE)
53
- oldincludedir = $(DESTDIR)/usr/include
54
- includedir = $(prefix)/include
55
- runstatedir = $(localstatedir)/run
56
- localstatedir = $(DESTDIR)/var
57
- sharedstatedir = $(DESTDIR)/var/lib
58
- sysconfdir = $(DESTDIR)/etc
59
- datadir = $(datarootdir)
60
- datarootdir = $(prefix)/share
61
- libexecdir = $(DESTDIR)/usr/lib/ruby
62
- sbindir = $(exec_prefix)/sbin
63
- bindir = $(exec_prefix)/bin
64
- archdir = $(rubyarchdir)
65
-
66
-
67
- CC_WRAPPER =
68
- CC = gcc
69
- CXX = g++
70
- LIBRUBY = $(LIBRUBY_SO)
71
- LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
72
- LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
73
- LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static $(MAINLIBS)
74
- empty =
75
- OUTFLAG = -o $(empty)
76
- COUTFLAG = -o $(empty)
77
- CSRCFLAG = $(empty)
78
-
79
- RUBY_EXTCONF_H =
80
- cflags = $(optflags) $(debugflags) $(warnflags)
81
- cxxflags = $(optflags) $(debugflags) $(warnflags)
82
- optflags = -O3
83
- debugflags = -ggdb3
84
- warnflags = -Wall -Wextra -Wdeclaration-after-statement -Wdeprecated-declarations -Wduplicated-cond -Wimplicit-function-declaration -Wimplicit-int -Wmisleading-indentation -Wpointer-arith -Wrestrict -Wwrite-strings -Wimplicit-fallthrough=0 -Wmissing-noreturn -Wno-cast-function-type -Wno-constant-logical-operand -Wno-long-long -Wno-missing-field-initializers -Wno-overlength-strings -Wno-packed-bitfield-compat -Wno-parentheses-equality -Wno-self-assign -Wno-tautological-compare -Wno-unused-parameter -Wno-unused-value -Wsuggest-attribute=format -Wsuggest-attribute=noreturn -Wunused-variable
85
- cppflags =
86
- CCDLFLAGS = -fPIC
87
- CFLAGS = $(CCDLFLAGS) -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fPIC -pthread $(ARCH_FLAG)
88
- INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
89
- DEFS =
90
- CPPFLAGS = -D_FORTIFY_SOURCE=2 $(DEFS) $(cppflags)
91
- CXXFLAGS = $(CCDLFLAGS) -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -pthread -fvisibility=hidden -std=c++11 $(ARCH_FLAG)
92
- ldflags = -L. -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now -fstack-protector-strong -rdynamic -Wl,-export-dynamic
93
- dldflags = -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now -Wl,--compress-debug-sections=zlib
94
- ARCH_FLAG =
95
- DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
96
- LDSHARED = $(CC) -shared
97
- LDSHAREDXX = $(CXX) -shared
98
- AR = ar
99
- EXEEXT =
100
-
101
- RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
102
- RUBY_SO_NAME = ruby
103
- RUBYW_INSTALL_NAME =
104
- RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
105
- RUBYW_BASE_NAME = rubyw
106
- RUBY_BASE_NAME = ruby
107
-
108
- arch = x86_64-linux
109
- sitearch = $(arch)
110
- ruby_version = 2.6.0
111
- ruby = $(bindir)/$(RUBY_BASE_NAME)
112
- RUBY = $(ruby)
113
- ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/backward.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
114
-
115
- RM = rm -f
116
- RM_RF = $(RUBY) -run -e rm -- -rf
117
- RMDIRS = rmdir --ignore-fail-on-non-empty -p
118
- MAKEDIRS = /usr/bin/mkdir -p
119
- INSTALL = /usr/bin/install -c
120
- INSTALL_PROG = $(INSTALL) -m 0755
121
- INSTALL_DATA = $(INSTALL) -m 644
122
- COPY = cp
123
- TOUCH = exit >
124
-
125
- #### End of system configuration section. ####
126
-
127
- preload =
128
- libpath = . $(libdir)
129
- LIBPATH = -L. -L$(libdir)
130
- DEFFILE =
131
-
132
- CLEANFILES = mkmf.log
133
- DISTCLEANFILES =
134
- DISTCLEANDIRS =
135
-
136
- extout =
137
- extout_prefix =
138
- target_prefix =
139
- LOCAL_LIBS =
140
- LIBS = $(LIBRUBYARG_SHARED) -lprotobuf -lm -lc
141
- ORIG_SRCS = base.cc embedding_feature_extractor.cc embedding_network.cc feature_extractor.cc feature_extractor.pb.cc feature_types.cc fixunicodevalue.cc fml_parser.cc generated_entities.cc generated_ulscript.cc getonescriptspan.cc lang_id_nn_params.cc language_identifier_features.cc nnet_language_identifier.cc nnet_language_identifier_c.cc offsetmap.cc registry.cc relevant_script_feature.cc sentence.pb.cc sentence_features.cc task_context.cc task_context_params.cc task_spec.pb.cc text_processing.cc unicodetext.cc utf8statetable.cc utils.cc workspace.cc
142
- SRCS = $(ORIG_SRCS)
143
- OBJS = base.o embedding_feature_extractor.o embedding_network.o feature_extractor.o feature_extractor.pb.o feature_types.o fixunicodevalue.o fml_parser.o generated_entities.o generated_ulscript.o getonescriptspan.o lang_id_nn_params.o language_identifier_features.o nnet_language_identifier.o nnet_language_identifier_c.o offsetmap.o registry.o relevant_script_feature.o sentence.pb.o sentence_features.o task_context.o task_context_params.o task_spec.pb.o text_processing.o unicodetext.o utf8statetable.o utils.o workspace.o
144
- HDRS = $(srcdir)/base.h $(srcdir)/casts.h $(srcdir)/embedding_feature_extractor.h $(srcdir)/embedding_network.h $(srcdir)/embedding_network_params.h $(srcdir)/feature_extractor.h $(srcdir)/feature_types.h $(srcdir)/float16.h $(srcdir)/fml_parser.h $(srcdir)/language_identifier_features.h $(srcdir)/lang_id_nn_params.h $(srcdir)/nnet_language_identifier.h $(srcdir)/registry.h $(srcdir)/relevant_script_feature.h $(srcdir)/script_detector.h $(srcdir)/sentence_features.h $(srcdir)/simple_adder.h $(srcdir)/fixunicodevalue.h $(srcdir)/generated_ulscript.h $(srcdir)/getonescriptspan.h $(srcdir)/integral_types.h $(srcdir)/offsetmap.h $(srcdir)/port.h $(srcdir)/stringpiece.h $(srcdir)/text_processing.h $(srcdir)/utf8acceptinterchange.h $(srcdir)/utf8prop_lettermarkscriptnum.h $(srcdir)/utf8repl_lettermarklower.h $(srcdir)/utf8scannot_lettermarkspecial.h $(srcdir)/utf8statetable.h $(srcdir)/task_context.h $(srcdir)/task_context_params.h $(srcdir)/unicodetext.h $(srcdir)/utils.h $(srcdir)/workspace.h $(srcdir)/feature_extractor.pb.h $(srcdir)/sentence.pb.h $(srcdir)/task_spec.pb.h
145
- LOCAL_HDRS =
146
- TARGET = libcld3
147
- TARGET_NAME = libcld3
148
- TARGET_ENTRY = Init_$(TARGET_NAME)
149
- DLLIB = $(TARGET).so
150
- EXTSTATIC =
151
- STATIC_LIB =
152
-
153
- TIMESTAMP_DIR = .
154
- BINDIR = $(bindir)
155
- RUBYCOMMONDIR = $(sitedir)$(target_prefix)
156
- RUBYLIBDIR = $(sitelibdir)$(target_prefix)
157
- RUBYARCHDIR = $(sitearchdir)$(target_prefix)
158
- HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
159
- ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
160
- TARGET_SO_DIR =
161
- TARGET_SO = $(TARGET_SO_DIR)$(DLLIB)
162
- CLEANLIBS = $(TARGET_SO)
163
- CLEANOBJS = *.o *.bak
164
-
165
- all: $(DLLIB)
166
- static: $(STATIC_LIB)
167
- .PHONY: all install static install-so install-rb
168
- .PHONY: clean clean-so clean-static clean-rb
169
-
170
- clean-static::
171
- clean-rb-default::
172
- clean-rb::
173
- clean-so::
174
- clean: clean-so clean-static clean-rb-default clean-rb
175
- -$(Q)$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
176
-
177
- distclean-rb-default::
178
- distclean-rb::
179
- distclean-so::
180
- distclean-static::
181
- distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
182
- -$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
183
- -$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
184
- -$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
185
-
186
- realclean: distclean
187
- install: install-so install-rb
188
-
189
- install-so: $(DLLIB) $(TIMESTAMP_DIR)/.sitearchdir.time
190
- $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
191
- clean-static::
192
- -$(Q)$(RM) $(STATIC_LIB)
193
- install-rb: pre-install-rb do-install-rb install-rb-default
194
- install-rb-default: pre-install-rb-default do-install-rb-default
195
- pre-install-rb: Makefile
196
- pre-install-rb-default: Makefile
197
- do-install-rb:
198
- do-install-rb-default:
199
- pre-install-rb-default:
200
- @$(NULLCMD)
201
- $(TIMESTAMP_DIR)/.sitearchdir.time:
202
- $(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
203
- $(Q) $(TOUCH) $@
204
-
205
- site-install: site-install-so site-install-rb
206
- site-install-so: install-so
207
- site-install-rb: install-rb
208
-
209
- .SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
210
-
211
- .cc.o:
212
- $(ECHO) compiling $(<)
213
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
214
-
215
- .cc.S:
216
- $(ECHO) translating $(<)
217
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
218
-
219
- .mm.o:
220
- $(ECHO) compiling $(<)
221
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
222
-
223
- .mm.S:
224
- $(ECHO) translating $(<)
225
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
226
-
227
- .cxx.o:
228
- $(ECHO) compiling $(<)
229
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
230
-
231
- .cxx.S:
232
- $(ECHO) translating $(<)
233
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
234
-
235
- .cpp.o:
236
- $(ECHO) compiling $(<)
237
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
238
-
239
- .cpp.S:
240
- $(ECHO) translating $(<)
241
- $(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
242
-
243
- .c.o:
244
- $(ECHO) compiling $(<)
245
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
246
-
247
- .c.S:
248
- $(ECHO) translating $(<)
249
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
250
-
251
- .m.o:
252
- $(ECHO) compiling $(<)
253
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $(CSRCFLAG)$<
254
-
255
- .m.S:
256
- $(ECHO) translating $(<)
257
- $(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $(CSRCFLAG)$<
258
-
259
- $(TARGET_SO): $(OBJS) Makefile
260
- $(ECHO) linking shared-object $(DLLIB)
261
- -$(Q)$(RM) $(@)
262
- $(Q) $(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
263
-
264
-
265
-
266
- $(OBJS): $(HDRS) $(ruby_headers)
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -1,36 +0,0 @@
1
- "pkg-config --exists protobuf"
2
- | pkg-config --libs protobuf
3
- => "-lprotobuf \n"
4
- "gcc -o conftest -I/usr/include/ruby-2.6.0/x86_64-linux -I/usr/include/ruby-2.6.0/ruby/backward -I/usr/include/ruby-2.6.0 -I. -D_FORTIFY_SOURCE=2 -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fPIC conftest.c -L. -L/usr/lib -L. -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now -fstack-protector-strong -rdynamic -Wl,-export-dynamic -lruby -lm -lc"
5
- checked program was:
6
- /* begin */
7
- 1: #include "ruby.h"
8
- 2:
9
- 3: int main(int argc, char **argv)
10
- 4: {
11
- 5: return 0;
12
- 6: }
13
- /* end */
14
-
15
- "gcc -o conftest -I/usr/include/ruby-2.6.0/x86_64-linux -I/usr/include/ruby-2.6.0/ruby/backward -I/usr/include/ruby-2.6.0 -I. -D_FORTIFY_SOURCE=2 -march=x86-64 -mtune=generic -O2 -pipe -fno-plt -fPIC conftest.c -L. -L/usr/lib -L. -Wl,-O1,--sort-common,--as-needed,-z,relro,-z,now -fstack-protector-strong -rdynamic -Wl,-export-dynamic -lruby -lprotobuf -lm -lc"
16
- checked program was:
17
- /* begin */
18
- 1: #include "ruby.h"
19
- 2:
20
- 3: int main(int argc, char **argv)
21
- 4: {
22
- 5: return 0;
23
- 6: }
24
- /* end */
25
-
26
- | pkg-config --cflags-only-I protobuf
27
- => "\n"
28
- | pkg-config --cflags-only-other protobuf
29
- => "-pthread \n"
30
- | pkg-config --libs-only-l protobuf
31
- => "-lprotobuf \n"
32
- package configuration for protobuf
33
- cflags: -pthread
34
- ldflags:
35
- libs: -lprotobuf
36
-
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file