RubyGems - cld3 - Versions diffs - 3.4.4 → 3.5.0 - Mend

cld3 3.4.4 → 3.5.0

Files changed (59) hide show

checksums.yaml +4 -4
data/Gemfile +0 -1
data/README.md +4 -7
data/cld3.gemspec +5 -5
data/ext/cld3/Makefile +17 -16
data/ext/cld3/base.o +0 -0
data/ext/cld3/cld_3/protos/feature_extractor.pb.h +100 -0
data/ext/cld3/cld_3/protos/sentence.pb.h +35 -0
data/ext/cld3/cld_3/protos/task_spec.pb.h +106 -0
data/ext/cld3/embedding_feature_extractor.o +0 -0
data/ext/cld3/embedding_network.o +0 -0
data/ext/cld3/extconf.rb +1 -10
data/ext/cld3/feature_extractor.o +0 -0
data/ext/cld3/feature_types.o +0 -0
data/ext/cld3/fixunicodevalue.o +0 -0
data/ext/cld3/fml_parser.o +0 -0
data/ext/cld3/generated_entities.o +0 -0
data/ext/cld3/generated_ulscript.o +0 -0
data/ext/cld3/getonescriptspan.cc +0 -2
data/ext/cld3/getonescriptspan.o +0 -0
data/ext/cld3/lang_id_nn_params.o +0 -0
data/ext/cld3/language_identifier_features.o +0 -0
data/ext/cld3/libcld3.so +0 -0
data/ext/cld3/nnet_language_identifier.o +0 -0
data/ext/cld3/nnet_language_identifier_c.o +0 -0
data/ext/cld3/offsetmap.o +0 -0
data/ext/cld3/registry.o +0 -0
data/ext/cld3/relevant_script_feature.o +0 -0
data/ext/cld3/script_span/fixunicodevalue.h +69 -0
data/ext/cld3/script_span/generated_ulscript.h +142 -0
data/ext/cld3/script_span/getonescriptspan.h +124 -0
data/ext/cld3/script_span/integral_types.h +37 -0
data/ext/cld3/script_span/offsetmap.h +168 -0
data/ext/cld3/script_span/port.h +143 -0
data/ext/cld3/script_span/stringpiece.h +81 -0
data/ext/cld3/script_span/text_processing.h +30 -0
data/ext/cld3/script_span/utf8acceptinterchange.h +486 -0
data/ext/cld3/script_span/utf8prop_lettermarkscriptnum.h +1631 -0
data/ext/cld3/script_span/utf8repl_lettermarklower.h +758 -0
data/ext/cld3/script_span/utf8scannot_lettermarkspecial.h +1455 -0
data/ext/cld3/script_span/utf8statetable.h +285 -0
data/ext/cld3/sentence_features.o +0 -0
data/ext/cld3/task_context.o +0 -0
data/ext/cld3/task_context_params.o +0 -0
data/ext/cld3/text_processing.o +0 -0
data/ext/cld3/unicodetext.o +0 -0
data/ext/cld3/utf8statetable.o +0 -0
data/ext/cld3/utils.o +0 -0
data/ext/cld3/workspace.o +0 -0
data/lib/cld3.rb +4 -1
metadata +33 -25
data/ext/cld3/feature_extractor.pb.o +0 -0
data/ext/cld3/feature_extractor.proto +0 -50
data/ext/cld3/mkmf.log +0 -37
data/ext/cld3/sentence.pb.o +0 -0
data/ext/cld3/sentence.proto +0 -77
data/ext/cld3/task_spec.pb.o +0 -0
data/ext/cld3/task_spec.proto +0 -98
data/lib/a.rb +0 -24

data/ext/cld3/script_span/utf8statetable.h ADDED Viewed

@@ -0,0 +1,285 @@
+// Copyright 2013 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// State Table follower for scanning UTF-8 strings without converting to
+// 32- or 16-bit Unicode values.
+//
+// Author: dsites@google.com (Dick Sites)
+//
+#ifndef SCRIPT_SPAN_UTF8STATETABLE_H_
+#define SCRIPT_SPAN_UTF8STATETABLE_H_
+#include <string>
+#include "integral_types.h" // for uint8, uint32, uint16
+#include "stringpiece.h"
+namespace chrome_lang_id {
+namespace CLD2 {
+class OffsetMap;
+// These four-byte entries compactly encode how many bytes 0..255 to delete
+// in making a string replacement, how many bytes to add 0..255, and the offset
+// 0..64k-1 of the replacement string in remap_string.
+struct RemapEntry {
+  uint8 delete_bytes;
+  uint8 add_bytes;
+  uint16 bytes_offset;
+};
+// Exit type codes for state tables. All but the first get stuffed into
+// signed one-byte entries. The first is only generated by executable code.
+// To distinguish from next-state entries, these must be contiguous and
+// all <= kExitNone
+typedef enum {
+  kExitDstSpaceFull = 239,
+  kExitIllegalStructure,  // 240
+  kExitOK,                // 241
+  kExitReject,            // ...
+  kExitReplace1,
+  kExitReplace2,
+  kExitReplace3,
+  kExitReplace21,
+  kExitReplace31,
+  kExitReplace32,
+  kExitReplaceOffset1,
+  kExitReplaceOffset2,
+  kExitReplace1S0,
+  kExitSpecial,
+  kExitDoAgain,
+  kExitRejectAlt,
+  kExitNone               // 255
+} ExitReason;
+typedef enum {
+  kExitDstSpaceFull_2 = 32767,       // 0x7fff
+  kExitIllegalStructure_2,  // 32768    0x8000
+  kExitOK_2,                // 32769    0x8001
+  kExitReject_2,            // ...
+  kExitReplace1_2,
+  kExitReplace2_2,
+  kExitReplace3_2,
+  kExitReplace21_2,
+  kExitReplace31_2,
+  kExitReplace32_2,
+  kExitReplaceOffset1_2,
+  kExitReplaceOffset2_2,
+  kExitReplace1S0_2,
+  kExitSpecial_2,
+  kExitDoAgain_2,
+  kExitRejectAlt_2,
+  kExitNone_2               // 32783    0x800f
+} ExitReason_2;
+// This struct represents one entire state table. The three initialized byte
+// areas are state_table, remap_base, and remap_string. state0 and state0_size
+// give the byte offset and length within state_table of the initial state --
+// table lookups are expected to start and end in this state, but for
+// truncated UTF-8 strings, may end in a different state. These allow a quick
+// test for that condition. entry_shift is 8 for tables subscripted by a full
+// byte value and 6 for space-optimized tables subscripted by only six
+// significant bits in UTF-8 continuation bytes.
+typedef struct {
+  const uint32 state0;
+  const uint32 state0_size;
+  const uint32 total_size;
+  const int max_expand;
+  const int entry_shift;
+  const int bytes_per_entry;
+  const uint32 losub;
+  const uint32 hiadd;
+  const uint8* state_table;
+  const RemapEntry* remap_base;
+  const uint8* remap_string;
+  const uint8* fast_state;
+} UTF8StateMachineObj;
+// Near-duplicate declaration for tables with two-byte entries
+typedef struct {
+  const uint32 state0;
+  const uint32 state0_size;
+  const uint32 total_size;
+  const int max_expand;
+  const int entry_shift;
+  const int bytes_per_entry;
+  const uint32 losub;
+  const uint32 hiadd;
+  const unsigned short* state_table;
+  const RemapEntry* remap_base;
+  const uint8* remap_string;
+  const uint8* fast_state;
+} UTF8StateMachineObj_2;
+typedef UTF8StateMachineObj UTF8PropObj;
+typedef UTF8StateMachineObj UTF8ScanObj;
+typedef UTF8StateMachineObj UTF8ReplaceObj;
+typedef UTF8StateMachineObj_2 UTF8PropObj_2;
+typedef UTF8StateMachineObj_2 UTF8ReplaceObj_2;
+// NOT IMPLEMENTED typedef UTF8StateMachineObj_2 UTF8ScanObj_2;
+// Look up property of one UTF-8 character and advance over it
+// Return 0 if input length is zero
+// Return 0 and advance one byte if input is ill-formed
+uint8 UTF8GenericProperty(const UTF8PropObj* st,
+                          const uint8** src,
+                          int* srclen);
+// Look up property of one UTF-8 character (assumed to be valid).
+// (This is a faster version of UTF8GenericProperty.)
+bool UTF8HasGenericProperty(const UTF8PropObj& st, const char* src);
+// BigOneByte versions are needed for tables > 240 states, but most
+// won't need the TwoByte versions.
+// Look up property of one UTF-8 character and advance over it
+// Return 0 if input length is zero
+// Return 0 and advance one byte if input is ill-formed
+uint8 UTF8GenericPropertyBigOneByte(const UTF8PropObj* st,
+                          const uint8** src,
+                          int* srclen);
+// TwoByte versions are needed for tables > 240 states that don't fit onto
+// BigOneByte -- rare ultimate fallback
+// Look up property of one UTF-8 character (assumed to be valid).
+// (This is a faster version of UTF8GenericProperty.)
+bool UTF8HasGenericPropertyBigOneByte(const UTF8PropObj& st, const char* src);
+// Look up property of one UTF-8 character and advance over it
+// Return 0 if input length is zero
+// Return 0 and advance one byte if input is ill-formed
+uint8 UTF8GenericPropertyTwoByte(const UTF8PropObj_2* st,
+                          const uint8** src,
+                          int* srclen);
+// Look up property of one UTF-8 character (assumed to be valid).
+// (This is a faster version of UTF8GenericProperty.)
+bool UTF8HasGenericPropertyTwoByte(const UTF8PropObj_2& st, const char* src);
+// Scan a UTF-8 stringpiece based on a state table.
+// Always scan complete UTF-8 characters
+// Set number of bytes scanned. Return reason for exiting
+int UTF8GenericScan(const UTF8ScanObj* st,
+                    const StringPiece& str,
+                    int* bytes_consumed);
+// Scan a UTF-8 stringpiece based on state table, copying to output stringpiece
+//   and doing text replacements.
+// Always scan complete UTF-8 characters
+// Set number of bytes consumed from input, number filled to output.
+// Return reason for exiting
+// Also writes an optional OffsetMap. Pass NULL to skip writing one.
+int UTF8GenericReplace(const UTF8ReplaceObj* st,
+                    const StringPiece& istr,
+                    StringPiece& ostr,
+                    bool is_plain_text,
+                    int* bytes_consumed,
+                    int* bytes_filled,
+                    int* chars_changed,
+                    OffsetMap* offsetmap);
+// Older version without offsetmap
+int UTF8GenericReplace(const UTF8ReplaceObj* st,
+                    const StringPiece& istr,
+                    StringPiece& ostr,
+                    bool is_plain_text,
+                    int* bytes_consumed,
+                    int* bytes_filled,
+                    int* chars_changed);
+// Older version without is_plain_text or offsetmap
+int UTF8GenericReplace(const UTF8ReplaceObj* st,
+                    const StringPiece& istr,
+                    StringPiece& ostr,
+                    int* bytes_consumed,
+                    int* bytes_filled,
+                    int* chars_changed);
+// TwoByte version is needed for tables > about 256 states, such
+// as the table for full Unicode 4.1 canonical + compatibility mapping
+// Scan a UTF-8 stringpiece based on state table with two-byte entries,
+//   copying to output stringpiece
+//   and doing text replacements.
+// Always scan complete UTF-8 characters
+// Set number of bytes consumed from input, number filled to output.
+// Return reason for exiting
+// Also writes an optional OffsetMap. Pass NULL to skip writing one.
+int UTF8GenericReplaceTwoByte(const UTF8ReplaceObj_2* st,
+                    const StringPiece& istr,
+                    StringPiece& ostr,
+                    bool is_plain_text,
+                    int* bytes_consumed,
+                    int* bytes_filled,
+                    int* chars_changed,
+                    OffsetMap* offsetmap);
+// Older version without offsetmap
+int UTF8GenericReplaceTwoByte(const UTF8ReplaceObj_2* st,
+                    const StringPiece& istr,
+                    StringPiece& ostr,
+                    bool is_plain_text,
+                    int* bytes_consumed,
+                    int* bytes_filled,
+                    int* chars_changed);
+// Older version without is_plain_text or offsetmap
+int UTF8GenericReplaceTwoByte(const UTF8ReplaceObj_2* st,
+                    const StringPiece& istr,
+                    StringPiece& ostr,
+                    int* bytes_consumed,
+                    int* bytes_filled,
+                    int* chars_changed);
+static const unsigned char kUTF8LenTbl[256] = {
+  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+  1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
+  2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
+  3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
+};
+inline int UTF8OneCharLen(const char* in) {
+  return kUTF8LenTbl[*reinterpret_cast<const uint8*>(in)];
+}
+// Adjust a stringpiece to encompass complete UTF-8 characters.
+// The data pointer will be increased by 0..3 bytes to get to a character
+// boundary, and the length will then be decreased by 0..3 bytes
+// to encompass the last complete character.
+// This is useful especially when a UTF-8 string must be put into a fixed-
+// maximum-size buffer cleanly, such as a MySQL buffer.
+void UTF8TrimToChars(StringPiece* istr);
+}       // End namespace CLD2
+}       // End namespace chrome_lang_id
+#endif  // SCRIPT_SPAN_UTF8STATETABLE_H_

data/ext/cld3/sentence_features.o CHANGED Viewed

Binary file

data/ext/cld3/task_context.o CHANGED Viewed

Binary file

data/ext/cld3/task_context_params.o CHANGED Viewed

Binary file

data/ext/cld3/text_processing.o CHANGED Viewed

Binary file

data/ext/cld3/unicodetext.o CHANGED Viewed

Binary file

data/ext/cld3/utf8statetable.o CHANGED Viewed

Binary file

data/ext/cld3/utils.o CHANGED Viewed

Binary file

data/ext/cld3/workspace.o CHANGED Viewed

Binary file

data/lib/cld3.rb CHANGED Viewed

@@ -76,7 +76,7 @@ module CLD3
     # The arguments are two Numeric objects.
     def initialize(min_num_bytes = MIN_NUM_BYTES_TO_CONSIDER, max_num_bytes = MAX_NUM_BYTES_TO_CONSIDER)
-      raise ArgumentError if max_num_bytes <= 0 || min_num_bytes < 0 || min_num_bytes >= max_num_bytes
+      raise ArgumentError if min_num_bytes < 0 || min_num_bytes >= max_num_bytes
       @cc = Unstable::NNetLanguageIdentifier::Pointer.new(Unstable.new_NNetLanguageIdentifier(min_num_bytes, max_num_bytes))
     end
@@ -88,6 +88,8 @@ module CLD3
     # The argument is a String object.
     # The returned value of this function is an instance of Result.
     def find_language(text)
+      # @type const FFI: untyped
       text_utf8 = text.encode(Encoding::UTF_8)
       pointer = FFI::MemoryPointer.new(:char, text_utf8.bytesize)
@@ -119,6 +121,7 @@ module CLD3
     # The second argument is Numeric object.
     # The returned value of this functions is an Array of Result instances.
     def find_top_n_most_freq_langs(text, num_langs)
+      # @type const FFI: untyped
       # @type var a: untyped
       text_utf8 = text.encode(Encoding::UTF_8)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: cld3
 version: !ruby/object:Gem::Version
-  version: 3.4.4
+  version: 3.5.0
 platform: ruby
 authors:
 - Akihiko Odaki
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-01-20 00:00:00.000000000 Z
+date: 2022-07-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: ffi
@@ -36,60 +36,60 @@ dependencies:
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.7.0
+        version: 2.6.0
     - - "<"
       - !ruby/object:Gem::Version
-        version: 1.8.0
+        version: 2.7.0
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 1.7.0
+        version: 2.6.0
     - - "<"
       - !ruby/object:Gem::Version
-        version: 1.8.0
+        version: 2.7.0
 - !ruby/object:Gem::Dependency
   name: rspec
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 3.0.0
+        version: 3.11.0
     - - "<"
       - !ruby/object:Gem::Version
-        version: 3.11.0
+        version: 3.12.0
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 3.0.0
+        version: 3.11.0
     - - "<"
       - !ruby/object:Gem::Version
-        version: 3.11.0
+        version: 3.12.0
 - !ruby/object:Gem::Dependency
   name: steep
   requirement: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.47.0
+        version: 1.0.0
     - - "<"
       - !ruby/object:Gem::Version
-        version: 0.48.0
+        version: 1.1.0
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - ">="
       - !ruby/object:Gem::Version
-        version: 0.47.0
+        version: 1.0.0
     - - "<"
       - !ruby/object:Gem::Version
-        version: 0.48.0
+        version: 1.1.0
 description: Compact Language Detector v3 (CLD3) is a neural network model for language
   identification.
 email: akihiko.odaki@gmail.com
@@ -108,6 +108,9 @@ files:
 - ext/cld3/base.h
 - ext/cld3/base.o
 - ext/cld3/casts.h
+- ext/cld3/cld_3/protos/feature_extractor.pb.h
+- ext/cld3/cld_3/protos/sentence.pb.h
+- ext/cld3/cld_3/protos/task_spec.pb.h
 - ext/cld3/embedding_feature_extractor.cc
 - ext/cld3/embedding_feature_extractor.h
 - ext/cld3/embedding_feature_extractor.o
@@ -119,8 +122,6 @@ files:
 - ext/cld3/feature_extractor.cc
 - ext/cld3/feature_extractor.h
 - ext/cld3/feature_extractor.o
-- ext/cld3/feature_extractor.pb.o
-- ext/cld3/feature_extractor.proto
 - ext/cld3/feature_types.cc
 - ext/cld3/feature_types.h
 - ext/cld3/feature_types.o
@@ -148,7 +149,6 @@ files:
 - ext/cld3/language_identifier_features.o
 - ext/cld3/libcld3.def
 - ext/cld3/libcld3.so
-- ext/cld3/mkmf.log
 - ext/cld3/nnet_language_identifier.cc
 - ext/cld3/nnet_language_identifier.h
 - ext/cld3/nnet_language_identifier.o
@@ -165,8 +165,19 @@ files:
 - ext/cld3/relevant_script_feature.h
 - ext/cld3/relevant_script_feature.o
 - ext/cld3/script_detector.h
-- ext/cld3/sentence.pb.o
-- ext/cld3/sentence.proto
+- ext/cld3/script_span/fixunicodevalue.h
+- ext/cld3/script_span/generated_ulscript.h
+- ext/cld3/script_span/getonescriptspan.h
+- ext/cld3/script_span/integral_types.h
+- ext/cld3/script_span/offsetmap.h
+- ext/cld3/script_span/port.h
+- ext/cld3/script_span/stringpiece.h
+- ext/cld3/script_span/text_processing.h
+- ext/cld3/script_span/utf8acceptinterchange.h
+- ext/cld3/script_span/utf8prop_lettermarkscriptnum.h
+- ext/cld3/script_span/utf8repl_lettermarklower.h
+- ext/cld3/script_span/utf8scannot_lettermarkspecial.h
+- ext/cld3/script_span/utf8statetable.h
 - ext/cld3/sentence_features.cc
 - ext/cld3/sentence_features.h
 - ext/cld3/sentence_features.o
@@ -178,8 +189,6 @@ files:
 - ext/cld3/task_context_params.cc
 - ext/cld3/task_context_params.h
 - ext/cld3/task_context_params.o
-- ext/cld3/task_spec.pb.o
-- ext/cld3/task_spec.proto
 - ext/cld3/text_processing.cc
 - ext/cld3/text_processing.h
 - ext/cld3/text_processing.o
@@ -199,7 +208,6 @@ files:
 - ext/cld3/workspace.cc
 - ext/cld3/workspace.h
 - ext/cld3/workspace.o
-- lib/a.rb
 - lib/cld3.rb
 - lib/cld3/unstable.rb
 - sig/cld3.rbs
@@ -215,17 +223,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 2.6.0
+      version: 2.7.0
   - - "<"
     - !ruby/object:Gem::Version
-      version: 3.2.0
+      version: 3.3.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.22
+rubygems_version: 3.3.7
 signing_key:
 specification_version: 4
 summary: Compact Language Detector v3 (CLD3)

data/ext/cld3/feature_extractor.pb.o DELETED Viewed

Binary file

data/ext/cld3/feature_extractor.proto DELETED Viewed

@@ -1,50 +0,0 @@
-/* Copyright 2016 Google Inc. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Protocol buffers for feature extractor.
-syntax = "proto2";
-option optimize_for = LITE_RUNTIME;
-package chrome_lang_id;
-message Parameter {
-  optional string name = 1;
-  optional string value = 2;
-}
-// Descriptor for feature function.
-message FeatureFunctionDescriptor {
-  // Feature function type.
-  required string type = 1;
-  // Feature function name.
-  optional string name = 2;
-  // Default argument for feature function.
-  optional int32 argument = 3 [default = 0];
-  // Named parameters for feature descriptor.
-  repeated Parameter parameter = 4;
-  // Nested sub-feature function descriptors.
-  repeated FeatureFunctionDescriptor feature = 7;
-};
-// Descriptor for feature extractor.
-message FeatureExtractorDescriptor {
-  // Top-level feature function for extractor.
-  repeated FeatureFunctionDescriptor feature = 1;
-};

data/ext/cld3/mkmf.log DELETED Viewed

@@ -1,37 +0,0 @@
-"pkg-config --exists protobuf"
-| pkg-config --libs protobuf
-=> "-lprotobuf -lpthread \n"
-"gcc -o conftest -I/usr/include -I/usr/include/ruby/backward -I/usr/include -I.    -O2  -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1  -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fPIC conftest.c  -L. -L/usr/lib64 -L. -Wl,-z,relro -Wl,--as-needed  -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld  -fstack-protector-strong -rdynamic -Wl,-export-dynamic     -lruby  -lm   -lc"
-checked program was:
-/* begin */
-1: #include "ruby.h"
-2:
-3: int main(int argc, char **argv)
-4: {
-5:   return !!argv[argc];
-6: }
-/* end */
-"gcc -o conftest -I/usr/include -I/usr/include/ruby/backward -I/usr/include -I.    -O2  -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 -fstack-protector-strong -specs=/usr/lib/rpm/redhat/redhat-annobin-cc1  -mbranch-protection=standard -fasynchronous-unwind-tables -fstack-clash-protection -fPIC conftest.c  -L. -L/usr/lib64 -L. -Wl,-z,relro -Wl,--as-needed  -Wl,-z,now -specs=/usr/lib/rpm/redhat/redhat-hardened-ld  -fstack-protector-strong -rdynamic -Wl,-export-dynamic     -lruby -lprotobuf -lpthread -lm   -lc"
-checked program was:
-/* begin */
-1: #include "ruby.h"
-2:
-3: int main(int argc, char **argv)
-4: {
-5:   return !!argv[argc];
-6: }
-/* end */
-| pkg-config --cflags-only-I protobuf
-=> "\n"
-| pkg-config --cflags-only-other protobuf
-=> "\n"
-| pkg-config --libs-only-l protobuf
-=> "-lprotobuf -lpthread \n"
-package configuration for protobuf
-incflags:
-cflags:
-ldflags:
-libs: -lprotobuf -lpthread

data/ext/cld3/sentence.pb.o DELETED Viewed

Binary file

data/ext/cld3/sentence.proto DELETED Viewed

@@ -1,77 +0,0 @@
-/* Copyright 2016 Google Inc. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-// Protocol buffer specification for sentence analysis.
-syntax = "proto2";
-option optimize_for = LITE_RUNTIME;
-package chrome_lang_id;
-// A Sentence contains the raw text contents of a sentence, as well as an
-// analysis.
-message Sentence {
-  // Identifier for sentence.
-  optional string id = 1;
-  // Raw text contents of the sentence.
-  optional string text = 2;
-  // Tokenization of the sentence.
-  repeated Token token = 3;
-  extensions 1000 to max;
-}
-// A sentence token marks a span of bytes in the sentence text as a token
-// or word.
-message Token {
-  // Token word form.
-  required string word = 1;
-  // Start position of token in text.
-  required int32 start = 2;
-  // End position of token in text. Gives index of last byte, not one past
-  // the last byte. If token came from lexer, excludes any trailing HTML tags.
-  required int32 end = 3;
-  // Head of this token in the dependency tree: the id of the token which has an
-  // arc going to this one. If it is the root token of a sentence, then it is
-  // set to -1.
-  optional int32 head = 4 [default = -1];
-  // Part-of-speech tag for token.
-  optional string tag = 5;
-  // Coarse-grained word category for token.
-  optional string category = 6;
-  // Label for dependency relation between this token and its head.
-  optional string label = 7;
-  // Break level for tokens that indicates how it was separated from the
-  // previous token in the text.
-  enum BreakLevel {
-    NO_BREAK = 0;         // No separation between tokens.
-    SPACE_BREAK = 1;      // Tokens separated by space.
-    LINE_BREAK = 2;       // Tokens separated by line break.
-    SENTENCE_BREAK = 3;   // Tokens separated by sentence break.
-  }
-  optional BreakLevel break_level = 8 [default = SPACE_BREAK];
-  extensions 1000 to max;
-}

data/ext/cld3/task_spec.pb.o DELETED Viewed

Binary file