cld 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. data/.gitignore +20 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +6 -0
  4. data/README.md +34 -0
  5. data/Rakefile +4 -14
  6. data/cld.gemspec +16 -27
  7. data/ext/cld/Makefile +31 -0
  8. data/{base → ext/cld/base}/basictypes.h +0 -0
  9. data/{base → ext/cld/base}/build_config.h +0 -0
  10. data/{base → ext/cld/base}/casts.h +0 -0
  11. data/{base → ext/cld/base}/commandlineflags.h +0 -0
  12. data/{base → ext/cld/base}/crash.h +0 -0
  13. data/{base → ext/cld/base}/dynamic_annotations.h +0 -0
  14. data/{base → ext/cld/base}/global_strip_options.h +0 -0
  15. data/{base → ext/cld/base}/log_severity.h +0 -0
  16. data/{base → ext/cld/base}/logging.h +0 -0
  17. data/{base → ext/cld/base}/macros.h +0 -0
  18. data/{base → ext/cld/base}/port.h +0 -0
  19. data/{base → ext/cld/base}/scoped_ptr.h +0 -0
  20. data/{base → ext/cld/base}/stl_decl.h +0 -0
  21. data/{base → ext/cld/base}/stl_decl_msvc.h +0 -0
  22. data/{base → ext/cld/base}/string_util.h +0 -0
  23. data/{base → ext/cld/base}/strtoint.h +0 -0
  24. data/{base → ext/cld/base}/template_util.h +0 -0
  25. data/{base → ext/cld/base}/type_traits.h +0 -0
  26. data/{base → ext/cld/base}/vlog_is_on.h +0 -0
  27. data/{cld_encodings.h → ext/cld/cld_encodings.h} +0 -0
  28. data/{encodings → ext/cld/encodings}/compact_lang_det/#cldutil.cc# +0 -0
  29. data/{encodings → ext/cld/encodings}/compact_lang_det/#cldutil.h# +0 -0
  30. data/{encodings → ext/cld/encodings}/compact_lang_det/#compact_lang_det_impl.h# +0 -0
  31. data/{encodings → ext/cld/encodings}/compact_lang_det/#ext_lang_enc.cc# +0 -0
  32. data/{encodings → ext/cld/encodings}/compact_lang_det/#ext_lang_enc.h# +0 -0
  33. data/{encodings → ext/cld/encodings}/compact_lang_det/#getonescriptspan.cc# +0 -0
  34. data/{encodings → ext/cld/encodings}/compact_lang_det/#getonescriptspan.h# +0 -0
  35. data/{encodings → ext/cld/encodings}/compact_lang_det/#tote.cc# +0 -0
  36. data/{encodings → ext/cld/encodings}/compact_lang_det/#tote.h# +0 -0
  37. data/{encodings → ext/cld/encodings}/compact_lang_det/cldutil.cc +0 -0
  38. data/{encodings → ext/cld/encodings}/compact_lang_det/cldutil.h +0 -0
  39. data/{encodings → ext/cld/encodings}/compact_lang_det/cldutil_dbg.h +0 -0
  40. data/{encodings → ext/cld/encodings}/compact_lang_det/cldutil_dbg_empty.cc +0 -0
  41. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det.cc +0 -0
  42. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det.h +0 -0
  43. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det_impl.cc +0 -0
  44. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det_impl.h +0 -0
  45. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det_unittest_small.cc +0 -0
  46. data/{encodings → ext/cld/encodings}/compact_lang_det/compile.cmd +0 -0
  47. data/{encodings → ext/cld/encodings}/compact_lang_det/ext_lang_enc.cc +0 -0
  48. data/{encodings → ext/cld/encodings}/compact_lang_det/ext_lang_enc.h +0 -0
  49. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +0 -0
  50. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +0 -0
  51. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +0 -0
  52. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +0 -0
  53. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +0 -0
  54. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +0 -0
  55. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +0 -0
  56. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +0 -0
  57. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +0 -0
  58. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +0 -0
  59. data/{encodings → ext/cld/encodings}/compact_lang_det/getonescriptspan.cc +0 -0
  60. data/{encodings → ext/cld/encodings}/compact_lang_det/getonescriptspan.h +0 -0
  61. data/{encodings → ext/cld/encodings}/compact_lang_det/letterscript_enum.cc +0 -0
  62. data/{encodings → ext/cld/encodings}/compact_lang_det/letterscript_enum.h +0 -0
  63. data/{encodings → ext/cld/encodings}/compact_lang_det/subsetsequence.cc +0 -0
  64. data/{encodings → ext/cld/encodings}/compact_lang_det/subsetsequence.h +0 -0
  65. data/{encodings → ext/cld/encodings}/compact_lang_det/subsetsequence_unittest.cc +0 -0
  66. data/{encodings → ext/cld/encodings}/compact_lang_det/tote.cc +0 -0
  67. data/{encodings → ext/cld/encodings}/compact_lang_det/tote.h +0 -0
  68. data/{encodings → ext/cld/encodings}/compact_lang_det/unittest_data.h +0 -0
  69. data/{encodings → ext/cld/encodings}/compact_lang_det/utf8propjustletter.h +0 -0
  70. data/{encodings → ext/cld/encodings}/compact_lang_det/utf8propletterscriptnum.h +0 -0
  71. data/{encodings → ext/cld/encodings}/compact_lang_det/utf8scannotjustletterspecial.h +0 -0
  72. data/{encodings → ext/cld/encodings}/compact_lang_det/win/#cld_unilib_windows.cc# +0 -0
  73. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_basictypes.h +0 -0
  74. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_commandlineflags.h +0 -0
  75. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_google.h +0 -0
  76. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_htmlutils.h +0 -0
  77. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_htmlutils_google3.cc +0 -0
  78. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_htmlutils_windows.cc +0 -0
  79. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_logging.h +0 -0
  80. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_macros.h +0 -0
  81. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_strtoint.h +0 -0
  82. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unicodetext.cc +0 -0
  83. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unicodetext.h +0 -0
  84. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unilib.h +0 -0
  85. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unilib_google3.cc +0 -0
  86. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unilib_windows.cc +0 -0
  87. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf.h +0 -0
  88. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8statetable.cc +0 -0
  89. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8statetable.h +0 -0
  90. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8utils.h +0 -0
  91. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8utils_google3.cc +0 -0
  92. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8utils_windows.cc +0 -0
  93. data/{encodings → ext/cld/encodings}/compact_lang_det/win/normalizedunicodetext.cc +0 -0
  94. data/{encodings → ext/cld/encodings}/compact_lang_det/win/normalizedunicodetext.h +0 -0
  95. data/{encodings → ext/cld/encodings}/internal/encodings.cc +0 -0
  96. data/{encodings → ext/cld/encodings}/lang_enc.h +0 -0
  97. data/{encodings → ext/cld/encodings}/proto/encodings.pb.h +0 -0
  98. data/{encodings → ext/cld/encodings}/public/encodings.h +0 -0
  99. data/ext/cld/extconf.rb +2 -7
  100. data/{languages → ext/cld/languages}/internal/#languages.cc# +0 -0
  101. data/{languages → ext/cld/languages}/internal/languages.cc +0 -0
  102. data/{languages → ext/cld/languages}/proto/languages.pb.h +0 -0
  103. data/{languages → ext/cld/languages}/public/languages.h +0 -0
  104. data/ext/cld/thunk.cc +56 -0
  105. data/lib/cld.rb +15 -6
  106. data/lib/cld/version.rb +3 -0
  107. data/spec/cld_spec.rb +44 -0
  108. data/spec/spec_helper.rb +6 -0
  109. metadata +132 -125
  110. data/Manifest +0 -105
  111. data/README.rdoc +0 -173
  112. data/build.sh +0 -48
  113. data/build.win.cmd +0 -28
  114. data/test/test.rb +0 -570
  115. data/thunk.cc +0 -131
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ ext/cld/*.o
19
+ ext/cld/*.a
20
+ ext/cld/*.so
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cld.gemspec
4
+ gemspec
5
+
6
+ gem "rake"
@@ -0,0 +1,34 @@
1
+ # Compact Language Detection
2
+
3
+ Blazing-fast language detection for Ruby provided by
4
+ Google Chrome's Compact Language Detector.
5
+
6
+ ## How to Use
7
+
8
+ ```ruby
9
+ CLD.detect_language("This is a test")
10
+ # => {:name => "ENGLISH", :code => "en", :reliable => true}
11
+
12
+ CLD.detect_language("plus ça change, plus c'est la même chose")
13
+ # => {:name => "FRENCH", :code => "fr", :reliable => true}
14
+ ```
15
+
16
+ ## Installation
17
+
18
+ Add this line to your application's Gemfile:
19
+
20
+ ```ruby
21
+ gem "cld"
22
+ ```
23
+
24
+ And then execute:
25
+
26
+ ```sh
27
+ $ bundle
28
+ ```
29
+
30
+ ## Thanks
31
+
32
+ Thanks to the Chrome authors, and to Mike McCandless for writing a Python version.
33
+
34
+ Licensed the same as Chrome. Jason Toy
data/Rakefile CHANGED
@@ -1,15 +1,5 @@
1
- require 'rubygems'
2
- require 'rake'
3
- require 'echoe'
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
4
3
 
5
- Echoe.new('cld', '0.4.0') do |p|
6
- p.description = "Compact Language Detection from chrome"
7
- p.url = "http://github.com/jtoy/cld"
8
- p.author = "Jason Toy"
9
- p.email = "jtoy@jtoy.net"
10
- p.ignore_pattern = ["tmp/*", "script/*"]
11
- p.runtime_dependencies = ["ffi"]
12
- p.development_dependencies = []
13
- end
14
-
15
- #Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
4
+ require "rspec/core/rake_task"
5
+ RSpec::Core::RakeTask.new("spec")
@@ -1,33 +1,22 @@
1
1
  # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/cld/version', __FILE__)
2
3
 
3
- Gem::Specification.new do |s|
4
- s.name = %q{cld}
5
- s.version = "0.4.0"
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Jason Toy"]
6
+ gem.email = ["jtoy@jtoy.net"]
7
+ gem.description = %q{Compact Language Detection for Ruby}
8
+ gem.summary = %q{Compact Language Detection for Ruby}
9
+ gem.homepage = "http://github.com/jtoy/cld"
6
10
 
7
- s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
- s.authors = [%q{Jason Toy}]
9
- s.date = %q{2011-11-03}
10
- s.description = %q{Compact Language Detection from chrome}
11
- s.email = %q{jtoy@jtoy.net}
12
- s.extensions = [%q{ext/cld/extconf.rb}]
13
- s.extra_rdoc_files = [%q{LICENSE}, %q{README.rdoc}, %q{ext/cld/extconf.rb}, %q{lib/cld.rb}]
14
- s.files = [%q{LICENSE}, %q{README.rdoc}, %q{Rakefile}, %q{base/basictypes.h}, %q{base/build_config.h}, %q{base/casts.h}, %q{base/commandlineflags.h}, %q{base/crash.h}, %q{base/dynamic_annotations.h}, %q{base/global_strip_options.h}, %q{base/log_severity.h}, %q{base/logging.h}, %q{base/macros.h}, %q{base/port.h}, %q{base/scoped_ptr.h}, %q{base/stl_decl.h}, %q{base/stl_decl_msvc.h}, %q{base/string_util.h}, %q{base/strtoint.h}, %q{base/template_util.h}, %q{base/type_traits.h}, %q{base/vlog_is_on.h}, %q{build.sh}, %q{build.win.cmd}, %q{cld_encodings.h}, %q{encodings/compact_lang_det/#cldutil.cc#}, %q{encodings/compact_lang_det/#cldutil.h#}, %q{encodings/compact_lang_det/#compact_lang_det_impl.h#}, %q{encodings/compact_lang_det/#ext_lang_enc.cc#}, %q{encodings/compact_lang_det/#ext_lang_enc.h#}, %q{encodings/compact_lang_det/#getonescriptspan.cc#}, %q{encodings/compact_lang_det/#getonescriptspan.h#}, %q{encodings/compact_lang_det/#tote.cc#}, %q{encodings/compact_lang_det/#tote.h#}, %q{encodings/compact_lang_det/cldutil.cc}, %q{encodings/compact_lang_det/cldutil.h}, %q{encodings/compact_lang_det/cldutil_dbg.h}, %q{encodings/compact_lang_det/cldutil_dbg_empty.cc}, %q{encodings/compact_lang_det/compact_lang_det.cc}, %q{encodings/compact_lang_det/compact_lang_det.h}, %q{encodings/compact_lang_det/compact_lang_det_impl.cc}, %q{encodings/compact_lang_det/compact_lang_det_impl.h}, %q{encodings/compact_lang_det/compact_lang_det_unittest_small.cc}, %q{encodings/compact_lang_det/compile.cmd}, %q{encodings/compact_lang_det/ext_lang_enc.cc}, %q{encodings/compact_lang_det/ext_lang_enc.h}, %q{encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc}, %q{encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc}, %q{encodings/compact_lang_det/getonescriptspan.cc}, %q{encodings/compact_lang_det/getonescriptspan.h}, %q{encodings/compact_lang_det/letterscript_enum.cc}, %q{encodings/compact_lang_det/letterscript_enum.h}, %q{encodings/compact_lang_det/subsetsequence.cc}, %q{encodings/compact_lang_det/subsetsequence.h}, %q{encodings/compact_lang_det/subsetsequence_unittest.cc}, %q{encodings/compact_lang_det/tote.cc}, %q{encodings/compact_lang_det/tote.h}, %q{encodings/compact_lang_det/unittest_data.h}, %q{encodings/compact_lang_det/utf8propjustletter.h}, %q{encodings/compact_lang_det/utf8propletterscriptnum.h}, %q{encodings/compact_lang_det/utf8scannotjustletterspecial.h}, %q{encodings/compact_lang_det/win/#cld_unilib_windows.cc#}, %q{encodings/compact_lang_det/win/cld_basictypes.h}, %q{encodings/compact_lang_det/win/cld_commandlineflags.h}, %q{encodings/compact_lang_det/win/cld_google.h}, %q{encodings/compact_lang_det/win/cld_htmlutils.h}, %q{encodings/compact_lang_det/win/cld_htmlutils_google3.cc}, %q{encodings/compact_lang_det/win/cld_htmlutils_windows.cc}, %q{encodings/compact_lang_det/win/cld_logging.h}, %q{encodings/compact_lang_det/win/cld_macros.h}, %q{encodings/compact_lang_det/win/cld_strtoint.h}, %q{encodings/compact_lang_det/win/cld_unicodetext.cc}, %q{encodings/compact_lang_det/win/cld_unicodetext.h}, %q{encodings/compact_lang_det/win/cld_unilib.h}, %q{encodings/compact_lang_det/win/cld_unilib_google3.cc}, %q{encodings/compact_lang_det/win/cld_unilib_windows.cc}, %q{encodings/compact_lang_det/win/cld_utf.h}, %q{encodings/compact_lang_det/win/cld_utf8statetable.cc}, %q{encodings/compact_lang_det/win/cld_utf8statetable.h}, %q{encodings/compact_lang_det/win/cld_utf8utils.h}, %q{encodings/compact_lang_det/win/cld_utf8utils_google3.cc}, %q{encodings/compact_lang_det/win/cld_utf8utils_windows.cc}, %q{encodings/compact_lang_det/win/normalizedunicodetext.cc}, %q{encodings/compact_lang_det/win/normalizedunicodetext.h}, %q{encodings/internal/encodings.cc}, %q{encodings/lang_enc.h}, %q{encodings/proto/encodings.pb.h}, %q{encodings/public/encodings.h}, %q{ext/cld/extconf.rb}, %q{languages/internal/#languages.cc#}, %q{languages/internal/languages.cc}, %q{languages/proto/languages.pb.h}, %q{languages/public/languages.h}, %q{lib/cld.rb}, %q{test/test.rb}, %q{thunk.cc}, %q{Manifest}, %q{cld.gemspec}]
15
- s.homepage = %q{http://github.com/jtoy/cld}
16
- s.rdoc_options = [%q{--line-numbers}, %q{--inline-source}, %q{--title}, %q{Cld}, %q{--main}, %q{README.rdoc}]
17
- s.require_paths = [%q{lib}, %q{ext}]
18
- s.rubyforge_project = %q{cld}
19
- s.rubygems_version = %q{1.8.6.1}
20
- s.summary = %q{Compact Language Detection from chrome}
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.extensions = ["ext/cld/extconf.rb"]
15
+ gem.name = "cld"
16
+ gem.require_paths = ["lib"]
17
+ gem.version = CLD::VERSION
21
18
 
22
- if s.respond_to? :specification_version then
23
- s.specification_version = 3
19
+ gem.add_dependency "ffi"
24
20
 
25
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
26
- s.add_runtime_dependency(%q<ffi>, [">= 0"])
27
- else
28
- s.add_dependency(%q<ffi>, [">= 0"])
29
- end
30
- else
31
- s.add_dependency(%q<ffi>, [">= 0"])
32
- end
21
+ gem.add_development_dependency "rspec"
33
22
  end
@@ -0,0 +1,31 @@
1
+ CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS
2
+ LDFLAGS=-L.
3
+ CC=g++
4
+ AR=ar
5
+ SOURCES=encodings/compact_lang_det/cldutil.cc \
6
+ encodings/compact_lang_det/cldutil_dbg_empty.cc \
7
+ encodings/compact_lang_det/compact_lang_det.cc \
8
+ encodings/compact_lang_det/compact_lang_det_impl.cc \
9
+ encodings/compact_lang_det/ext_lang_enc.cc \
10
+ encodings/compact_lang_det/getonescriptspan.cc \
11
+ encodings/compact_lang_det/letterscript_enum.cc \
12
+ encodings/compact_lang_det/tote.cc \
13
+ encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc \
14
+ encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc \
15
+ encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc \
16
+ encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc \
17
+ encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc \
18
+ encodings/compact_lang_det/win/cld_htmlutils_windows.cc \
19
+ encodings/compact_lang_det/win/cld_unilib_windows.cc \
20
+ encodings/compact_lang_det/win/cld_utf8statetable.cc \
21
+ encodings/compact_lang_det/win/cld_utf8utils_windows.cc \
22
+ encodings/internal/encodings.cc \
23
+ languages/internal/languages.cc \
24
+ thunk.cc
25
+
26
+ install:
27
+ rm -f *.o
28
+ rm -f libcld.a
29
+ $(CC) -c $(CFLAGS) $(SOURCES)
30
+ $(AR) rcs libcld.a *.o
31
+ $(CC) -DCLD_WINDOWS -I. -L. -shared -o cld.so -lstdc++ *.o
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -1,7 +1,2 @@
1
- require "rake"
2
- home_dir = File.expand_path(File.join(File.dirname(__FILE__), "../../"))
3
- puts home_dir
4
- cmd = "cd #{home_dir}; ./build.sh"
5
- sh cmd
6
- sh "mv #{home_dir}/cld.so #{home_dir}/ext/cld/"
7
- sh "echo 'install:\n\tdate' > #{home_dir}/ext/cld/Makefile"
1
+ #require 'mkmf'
2
+ #create_makefile('cld/cld')
@@ -0,0 +1,56 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include "encodings/compact_lang_det/compact_lang_det.h"
4
+ #include "encodings/compact_lang_det/ext_lang_enc.h"
5
+ #include "encodings/compact_lang_det/unittest_data.h"
6
+ #include "encodings/proto/encodings.pb.h"
7
+
8
+ typedef struct {
9
+ const char *name;
10
+ const char *code;
11
+ bool reliable;
12
+ } RESULT;
13
+
14
+ extern "C" {
15
+ RESULT detectLanguageThunkInt(const char * src) {
16
+ bool is_plain_text = true;
17
+ bool do_allow_extended_languages = true;
18
+ bool do_pick_summary_language = false;
19
+ bool do_remove_weak_matches = false;
20
+ bool is_reliable;
21
+ Language plus_one = UNKNOWN_LANGUAGE;
22
+ const char* tld_hint = NULL;
23
+ int encoding_hint = UNKNOWN_ENCODING;
24
+ Language language_hint = UNKNOWN_LANGUAGE;
25
+
26
+ double normalized_score3[3];
27
+ Language language3[3];
28
+ int percent3[3];
29
+ int text_bytes;
30
+
31
+ Language lang;
32
+ lang = CompactLangDet::DetectLanguage(0,
33
+ src, strlen(src),
34
+ is_plain_text,
35
+ do_allow_extended_languages,
36
+ do_pick_summary_language,
37
+ do_remove_weak_matches,
38
+ tld_hint,
39
+ encoding_hint,
40
+ language_hint,
41
+ language3,
42
+ percent3,
43
+ normalized_score3,
44
+ &text_bytes,
45
+ &is_reliable);
46
+
47
+ RESULT res;
48
+ res.name = LanguageName(lang);
49
+ res.code = ExtLanguageCode(lang);
50
+ res.reliable = is_reliable;
51
+ return res;
52
+ }
53
+ }
54
+
55
+ int main(int argc, char **argv) {
56
+ }
data/lib/cld.rb CHANGED
@@ -1,12 +1,21 @@
1
- require "rubygems"
1
+ require "cld/version"
2
2
  require "ffi"
3
3
 
4
4
  module CLD
5
5
  extend FFI::Library
6
- dir = File.expand_path(File.join(File.dirname(__FILE__), "../ext/cld"))
7
- ffi_lib "#{dir}/cld.so"
8
- attach_function "detect_language","detectLanguageThunkInt", [:buffer_in], :int
9
- def self.english?(text)
10
- detect_language(text) == 0
6
+
7
+ def self.detect_language(text)
8
+ result = detect_language_ext(text)
9
+ Hash[ result.members.map {|member| [member.to_sym, result[member]]} ]
11
10
  end
11
+
12
+ private
13
+
14
+ class ReturnValue < FFI::Struct
15
+ layout :name, :string, :code, :string, :reliable, :bool
16
+ end
17
+
18
+ GEM_ROOT = File.expand_path("../../", __FILE__)
19
+ ffi_lib "#{GEM_ROOT}/ext/cld/cld.so"
20
+ attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value
12
21
  end