cld 0.4.0 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. data/.gitignore +20 -0
  2. data/.rspec +2 -0
  3. data/Gemfile +6 -0
  4. data/README.md +34 -0
  5. data/Rakefile +4 -14
  6. data/cld.gemspec +16 -27
  7. data/ext/cld/Makefile +31 -0
  8. data/{base → ext/cld/base}/basictypes.h +0 -0
  9. data/{base → ext/cld/base}/build_config.h +0 -0
  10. data/{base → ext/cld/base}/casts.h +0 -0
  11. data/{base → ext/cld/base}/commandlineflags.h +0 -0
  12. data/{base → ext/cld/base}/crash.h +0 -0
  13. data/{base → ext/cld/base}/dynamic_annotations.h +0 -0
  14. data/{base → ext/cld/base}/global_strip_options.h +0 -0
  15. data/{base → ext/cld/base}/log_severity.h +0 -0
  16. data/{base → ext/cld/base}/logging.h +0 -0
  17. data/{base → ext/cld/base}/macros.h +0 -0
  18. data/{base → ext/cld/base}/port.h +0 -0
  19. data/{base → ext/cld/base}/scoped_ptr.h +0 -0
  20. data/{base → ext/cld/base}/stl_decl.h +0 -0
  21. data/{base → ext/cld/base}/stl_decl_msvc.h +0 -0
  22. data/{base → ext/cld/base}/string_util.h +0 -0
  23. data/{base → ext/cld/base}/strtoint.h +0 -0
  24. data/{base → ext/cld/base}/template_util.h +0 -0
  25. data/{base → ext/cld/base}/type_traits.h +0 -0
  26. data/{base → ext/cld/base}/vlog_is_on.h +0 -0
  27. data/{cld_encodings.h → ext/cld/cld_encodings.h} +0 -0
  28. data/{encodings → ext/cld/encodings}/compact_lang_det/#cldutil.cc# +0 -0
  29. data/{encodings → ext/cld/encodings}/compact_lang_det/#cldutil.h# +0 -0
  30. data/{encodings → ext/cld/encodings}/compact_lang_det/#compact_lang_det_impl.h# +0 -0
  31. data/{encodings → ext/cld/encodings}/compact_lang_det/#ext_lang_enc.cc# +0 -0
  32. data/{encodings → ext/cld/encodings}/compact_lang_det/#ext_lang_enc.h# +0 -0
  33. data/{encodings → ext/cld/encodings}/compact_lang_det/#getonescriptspan.cc# +0 -0
  34. data/{encodings → ext/cld/encodings}/compact_lang_det/#getonescriptspan.h# +0 -0
  35. data/{encodings → ext/cld/encodings}/compact_lang_det/#tote.cc# +0 -0
  36. data/{encodings → ext/cld/encodings}/compact_lang_det/#tote.h# +0 -0
  37. data/{encodings → ext/cld/encodings}/compact_lang_det/cldutil.cc +0 -0
  38. data/{encodings → ext/cld/encodings}/compact_lang_det/cldutil.h +0 -0
  39. data/{encodings → ext/cld/encodings}/compact_lang_det/cldutil_dbg.h +0 -0
  40. data/{encodings → ext/cld/encodings}/compact_lang_det/cldutil_dbg_empty.cc +0 -0
  41. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det.cc +0 -0
  42. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det.h +0 -0
  43. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det_impl.cc +0 -0
  44. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det_impl.h +0 -0
  45. data/{encodings → ext/cld/encodings}/compact_lang_det/compact_lang_det_unittest_small.cc +0 -0
  46. data/{encodings → ext/cld/encodings}/compact_lang_det/compile.cmd +0 -0
  47. data/{encodings → ext/cld/encodings}/compact_lang_det/ext_lang_enc.cc +0 -0
  48. data/{encodings → ext/cld/encodings}/compact_lang_det/ext_lang_enc.h +0 -0
  49. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc +0 -0
  50. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc +0 -0
  51. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc +0 -0
  52. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc +0 -0
  53. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc +0 -0
  54. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc +0 -0
  55. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc +0 -0
  56. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_meanscore.h +0 -0
  57. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc +0 -0
  58. data/{encodings → ext/cld/encodings}/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc +0 -0
  59. data/{encodings → ext/cld/encodings}/compact_lang_det/getonescriptspan.cc +0 -0
  60. data/{encodings → ext/cld/encodings}/compact_lang_det/getonescriptspan.h +0 -0
  61. data/{encodings → ext/cld/encodings}/compact_lang_det/letterscript_enum.cc +0 -0
  62. data/{encodings → ext/cld/encodings}/compact_lang_det/letterscript_enum.h +0 -0
  63. data/{encodings → ext/cld/encodings}/compact_lang_det/subsetsequence.cc +0 -0
  64. data/{encodings → ext/cld/encodings}/compact_lang_det/subsetsequence.h +0 -0
  65. data/{encodings → ext/cld/encodings}/compact_lang_det/subsetsequence_unittest.cc +0 -0
  66. data/{encodings → ext/cld/encodings}/compact_lang_det/tote.cc +0 -0
  67. data/{encodings → ext/cld/encodings}/compact_lang_det/tote.h +0 -0
  68. data/{encodings → ext/cld/encodings}/compact_lang_det/unittest_data.h +0 -0
  69. data/{encodings → ext/cld/encodings}/compact_lang_det/utf8propjustletter.h +0 -0
  70. data/{encodings → ext/cld/encodings}/compact_lang_det/utf8propletterscriptnum.h +0 -0
  71. data/{encodings → ext/cld/encodings}/compact_lang_det/utf8scannotjustletterspecial.h +0 -0
  72. data/{encodings → ext/cld/encodings}/compact_lang_det/win/#cld_unilib_windows.cc# +0 -0
  73. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_basictypes.h +0 -0
  74. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_commandlineflags.h +0 -0
  75. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_google.h +0 -0
  76. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_htmlutils.h +0 -0
  77. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_htmlutils_google3.cc +0 -0
  78. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_htmlutils_windows.cc +0 -0
  79. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_logging.h +0 -0
  80. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_macros.h +0 -0
  81. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_strtoint.h +0 -0
  82. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unicodetext.cc +0 -0
  83. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unicodetext.h +0 -0
  84. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unilib.h +0 -0
  85. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unilib_google3.cc +0 -0
  86. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_unilib_windows.cc +0 -0
  87. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf.h +0 -0
  88. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8statetable.cc +0 -0
  89. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8statetable.h +0 -0
  90. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8utils.h +0 -0
  91. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8utils_google3.cc +0 -0
  92. data/{encodings → ext/cld/encodings}/compact_lang_det/win/cld_utf8utils_windows.cc +0 -0
  93. data/{encodings → ext/cld/encodings}/compact_lang_det/win/normalizedunicodetext.cc +0 -0
  94. data/{encodings → ext/cld/encodings}/compact_lang_det/win/normalizedunicodetext.h +0 -0
  95. data/{encodings → ext/cld/encodings}/internal/encodings.cc +0 -0
  96. data/{encodings → ext/cld/encodings}/lang_enc.h +0 -0
  97. data/{encodings → ext/cld/encodings}/proto/encodings.pb.h +0 -0
  98. data/{encodings → ext/cld/encodings}/public/encodings.h +0 -0
  99. data/ext/cld/extconf.rb +2 -7
  100. data/{languages → ext/cld/languages}/internal/#languages.cc# +0 -0
  101. data/{languages → ext/cld/languages}/internal/languages.cc +0 -0
  102. data/{languages → ext/cld/languages}/proto/languages.pb.h +0 -0
  103. data/{languages → ext/cld/languages}/public/languages.h +0 -0
  104. data/ext/cld/thunk.cc +56 -0
  105. data/lib/cld.rb +15 -6
  106. data/lib/cld/version.rb +3 -0
  107. data/spec/cld_spec.rb +44 -0
  108. data/spec/spec_helper.rb +6 -0
  109. metadata +132 -125
  110. data/Manifest +0 -105
  111. data/README.rdoc +0 -173
  112. data/build.sh +0 -48
  113. data/build.win.cmd +0 -28
  114. data/test/test.rb +0 -570
  115. data/thunk.cc +0 -131
@@ -0,0 +1,20 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
18
+ ext/cld/*.o
19
+ ext/cld/*.a
20
+ ext/cld/*.so
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in cld.gemspec
4
+ gemspec
5
+
6
+ gem "rake"
@@ -0,0 +1,34 @@
1
+ # Compact Language Detection
2
+
3
+ Blazing-fast language detection for Ruby provided by
4
+ Google Chrome's Compact Language Detector.
5
+
6
+ ## How to Use
7
+
8
+ ```ruby
9
+ CLD.detect_language("This is a test")
10
+ # => {:name => "ENGLISH", :code => "en", :reliable => true}
11
+
12
+ CLD.detect_language("plus ça change, plus c'est la même chose")
13
+ # => {:name => "FRENCH", :code => "fr", :reliable => true}
14
+ ```
15
+
16
+ ## Installation
17
+
18
+ Add this line to your application's Gemfile:
19
+
20
+ ```ruby
21
+ gem "cld"
22
+ ```
23
+
24
+ And then execute:
25
+
26
+ ```sh
27
+ $ bundle
28
+ ```
29
+
30
+ ## Thanks
31
+
32
+ Thanks to the Chrome authors, and to Mike McCandless for writing a Python version.
33
+
34
+ Licensed the same as Chrome. Jason Toy
data/Rakefile CHANGED
@@ -1,15 +1,5 @@
1
- require 'rubygems'
2
- require 'rake'
3
- require 'echoe'
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
4
3
 
5
- Echoe.new('cld', '0.4.0') do |p|
6
- p.description = "Compact Language Detection from chrome"
7
- p.url = "http://github.com/jtoy/cld"
8
- p.author = "Jason Toy"
9
- p.email = "jtoy@jtoy.net"
10
- p.ignore_pattern = ["tmp/*", "script/*"]
11
- p.runtime_dependencies = ["ffi"]
12
- p.development_dependencies = []
13
- end
14
-
15
- #Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
4
+ require "rspec/core/rake_task"
5
+ RSpec::Core::RakeTask.new("spec")
@@ -1,33 +1,22 @@
1
1
  # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/cld/version', __FILE__)
2
3
 
3
- Gem::Specification.new do |s|
4
- s.name = %q{cld}
5
- s.version = "0.4.0"
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Jason Toy"]
6
+ gem.email = ["jtoy@jtoy.net"]
7
+ gem.description = %q{Compact Language Detection for Ruby}
8
+ gem.summary = %q{Compact Language Detection for Ruby}
9
+ gem.homepage = "http://github.com/jtoy/cld"
6
10
 
7
- s.required_rubygems_version = Gem::Requirement.new(">= 1.2") if s.respond_to? :required_rubygems_version=
8
- s.authors = [%q{Jason Toy}]
9
- s.date = %q{2011-11-03}
10
- s.description = %q{Compact Language Detection from chrome}
11
- s.email = %q{jtoy@jtoy.net}
12
- s.extensions = [%q{ext/cld/extconf.rb}]
13
- s.extra_rdoc_files = [%q{LICENSE}, %q{README.rdoc}, %q{ext/cld/extconf.rb}, %q{lib/cld.rb}]
14
- s.files = [%q{LICENSE}, %q{README.rdoc}, %q{Rakefile}, %q{base/basictypes.h}, %q{base/build_config.h}, %q{base/casts.h}, %q{base/commandlineflags.h}, %q{base/crash.h}, %q{base/dynamic_annotations.h}, %q{base/global_strip_options.h}, %q{base/log_severity.h}, %q{base/logging.h}, %q{base/macros.h}, %q{base/port.h}, %q{base/scoped_ptr.h}, %q{base/stl_decl.h}, %q{base/stl_decl_msvc.h}, %q{base/string_util.h}, %q{base/strtoint.h}, %q{base/template_util.h}, %q{base/type_traits.h}, %q{base/vlog_is_on.h}, %q{build.sh}, %q{build.win.cmd}, %q{cld_encodings.h}, %q{encodings/compact_lang_det/#cldutil.cc#}, %q{encodings/compact_lang_det/#cldutil.h#}, %q{encodings/compact_lang_det/#compact_lang_det_impl.h#}, %q{encodings/compact_lang_det/#ext_lang_enc.cc#}, %q{encodings/compact_lang_det/#ext_lang_enc.h#}, %q{encodings/compact_lang_det/#getonescriptspan.cc#}, %q{encodings/compact_lang_det/#getonescriptspan.h#}, %q{encodings/compact_lang_det/#tote.cc#}, %q{encodings/compact_lang_det/#tote.h#}, %q{encodings/compact_lang_det/cldutil.cc}, %q{encodings/compact_lang_det/cldutil.h}, %q{encodings/compact_lang_det/cldutil_dbg.h}, %q{encodings/compact_lang_det/cldutil_dbg_empty.cc}, %q{encodings/compact_lang_det/compact_lang_det.cc}, %q{encodings/compact_lang_det/compact_lang_det.h}, %q{encodings/compact_lang_det/compact_lang_det_impl.cc}, %q{encodings/compact_lang_det/compact_lang_det_impl.h}, %q{encodings/compact_lang_det/compact_lang_det_unittest_small.cc}, %q{encodings/compact_lang_det/compile.cmd}, %q{encodings/compact_lang_det/ext_lang_enc.cc}, %q{encodings/compact_lang_det/ext_lang_enc.h}, %q{encodings/compact_lang_det/generated/cld_generated_score_deltaoctachrome_0406.cc}, %q{encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz_0.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_quads_0.cc}, %q{encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc}, %q{encodings/compact_lang_det/getonescriptspan.cc}, %q{encodings/compact_lang_det/getonescriptspan.h}, %q{encodings/compact_lang_det/letterscript_enum.cc}, %q{encodings/compact_lang_det/letterscript_enum.h}, %q{encodings/compact_lang_det/subsetsequence.cc}, %q{encodings/compact_lang_det/subsetsequence.h}, %q{encodings/compact_lang_det/subsetsequence_unittest.cc}, %q{encodings/compact_lang_det/tote.cc}, %q{encodings/compact_lang_det/tote.h}, %q{encodings/compact_lang_det/unittest_data.h}, %q{encodings/compact_lang_det/utf8propjustletter.h}, %q{encodings/compact_lang_det/utf8propletterscriptnum.h}, %q{encodings/compact_lang_det/utf8scannotjustletterspecial.h}, %q{encodings/compact_lang_det/win/#cld_unilib_windows.cc#}, %q{encodings/compact_lang_det/win/cld_basictypes.h}, %q{encodings/compact_lang_det/win/cld_commandlineflags.h}, %q{encodings/compact_lang_det/win/cld_google.h}, %q{encodings/compact_lang_det/win/cld_htmlutils.h}, %q{encodings/compact_lang_det/win/cld_htmlutils_google3.cc}, %q{encodings/compact_lang_det/win/cld_htmlutils_windows.cc}, %q{encodings/compact_lang_det/win/cld_logging.h}, %q{encodings/compact_lang_det/win/cld_macros.h}, %q{encodings/compact_lang_det/win/cld_strtoint.h}, %q{encodings/compact_lang_det/win/cld_unicodetext.cc}, %q{encodings/compact_lang_det/win/cld_unicodetext.h}, %q{encodings/compact_lang_det/win/cld_unilib.h}, %q{encodings/compact_lang_det/win/cld_unilib_google3.cc}, %q{encodings/compact_lang_det/win/cld_unilib_windows.cc}, %q{encodings/compact_lang_det/win/cld_utf.h}, %q{encodings/compact_lang_det/win/cld_utf8statetable.cc}, %q{encodings/compact_lang_det/win/cld_utf8statetable.h}, %q{encodings/compact_lang_det/win/cld_utf8utils.h}, %q{encodings/compact_lang_det/win/cld_utf8utils_google3.cc}, %q{encodings/compact_lang_det/win/cld_utf8utils_windows.cc}, %q{encodings/compact_lang_det/win/normalizedunicodetext.cc}, %q{encodings/compact_lang_det/win/normalizedunicodetext.h}, %q{encodings/internal/encodings.cc}, %q{encodings/lang_enc.h}, %q{encodings/proto/encodings.pb.h}, %q{encodings/public/encodings.h}, %q{ext/cld/extconf.rb}, %q{languages/internal/#languages.cc#}, %q{languages/internal/languages.cc}, %q{languages/proto/languages.pb.h}, %q{languages/public/languages.h}, %q{lib/cld.rb}, %q{test/test.rb}, %q{thunk.cc}, %q{Manifest}, %q{cld.gemspec}]
15
- s.homepage = %q{http://github.com/jtoy/cld}
16
- s.rdoc_options = [%q{--line-numbers}, %q{--inline-source}, %q{--title}, %q{Cld}, %q{--main}, %q{README.rdoc}]
17
- s.require_paths = [%q{lib}, %q{ext}]
18
- s.rubyforge_project = %q{cld}
19
- s.rubygems_version = %q{1.8.6.1}
20
- s.summary = %q{Compact Language Detection from chrome}
11
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
12
+ gem.files = `git ls-files`.split("\n")
13
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ gem.extensions = ["ext/cld/extconf.rb"]
15
+ gem.name = "cld"
16
+ gem.require_paths = ["lib"]
17
+ gem.version = CLD::VERSION
21
18
 
22
- if s.respond_to? :specification_version then
23
- s.specification_version = 3
19
+ gem.add_dependency "ffi"
24
20
 
25
- if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
26
- s.add_runtime_dependency(%q<ffi>, [">= 0"])
27
- else
28
- s.add_dependency(%q<ffi>, [">= 0"])
29
- end
30
- else
31
- s.add_dependency(%q<ffi>, [">= 0"])
32
- end
21
+ gem.add_development_dependency "rspec"
33
22
  end
@@ -0,0 +1,31 @@
1
+ CFLAGS=-fPIC -I. -O2 -DCLD_WINDOWS
2
+ LDFLAGS=-L.
3
+ CC=g++
4
+ AR=ar
5
+ SOURCES=encodings/compact_lang_det/cldutil.cc \
6
+ encodings/compact_lang_det/cldutil_dbg_empty.cc \
7
+ encodings/compact_lang_det/compact_lang_det.cc \
8
+ encodings/compact_lang_det/compact_lang_det_impl.cc \
9
+ encodings/compact_lang_det/ext_lang_enc.cc \
10
+ encodings/compact_lang_det/getonescriptspan.cc \
11
+ encodings/compact_lang_det/letterscript_enum.cc \
12
+ encodings/compact_lang_det/tote.cc \
13
+ encodings/compact_lang_det/generated/cld_generated_score_quadchrome_0406.cc \
14
+ encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc \
15
+ encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc \
16
+ encodings/compact_lang_det/generated/compact_lang_det_generated_deltaoctachrome.cc \
17
+ encodings/compact_lang_det/generated/compact_lang_det_generated_quadschrome.cc \
18
+ encodings/compact_lang_det/win/cld_htmlutils_windows.cc \
19
+ encodings/compact_lang_det/win/cld_unilib_windows.cc \
20
+ encodings/compact_lang_det/win/cld_utf8statetable.cc \
21
+ encodings/compact_lang_det/win/cld_utf8utils_windows.cc \
22
+ encodings/internal/encodings.cc \
23
+ languages/internal/languages.cc \
24
+ thunk.cc
25
+
26
+ install:
27
+ rm -f *.o
28
+ rm -f libcld.a
29
+ $(CC) -c $(CFLAGS) $(SOURCES)
30
+ $(AR) rcs libcld.a *.o
31
+ $(CC) -DCLD_WINDOWS -I. -L. -shared -o cld.so -lstdc++ *.o
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -1,7 +1,2 @@
1
- require "rake"
2
- home_dir = File.expand_path(File.join(File.dirname(__FILE__), "../../"))
3
- puts home_dir
4
- cmd = "cd #{home_dir}; ./build.sh"
5
- sh cmd
6
- sh "mv #{home_dir}/cld.so #{home_dir}/ext/cld/"
7
- sh "echo 'install:\n\tdate' > #{home_dir}/ext/cld/Makefile"
1
+ #require 'mkmf'
2
+ #create_makefile('cld/cld')
@@ -0,0 +1,56 @@
1
+ #include <stdio.h>
2
+ #include <string.h>
3
+ #include "encodings/compact_lang_det/compact_lang_det.h"
4
+ #include "encodings/compact_lang_det/ext_lang_enc.h"
5
+ #include "encodings/compact_lang_det/unittest_data.h"
6
+ #include "encodings/proto/encodings.pb.h"
7
+
8
+ typedef struct {
9
+ const char *name;
10
+ const char *code;
11
+ bool reliable;
12
+ } RESULT;
13
+
14
+ extern "C" {
15
+ RESULT detectLanguageThunkInt(const char * src) {
16
+ bool is_plain_text = true;
17
+ bool do_allow_extended_languages = true;
18
+ bool do_pick_summary_language = false;
19
+ bool do_remove_weak_matches = false;
20
+ bool is_reliable;
21
+ Language plus_one = UNKNOWN_LANGUAGE;
22
+ const char* tld_hint = NULL;
23
+ int encoding_hint = UNKNOWN_ENCODING;
24
+ Language language_hint = UNKNOWN_LANGUAGE;
25
+
26
+ double normalized_score3[3];
27
+ Language language3[3];
28
+ int percent3[3];
29
+ int text_bytes;
30
+
31
+ Language lang;
32
+ lang = CompactLangDet::DetectLanguage(0,
33
+ src, strlen(src),
34
+ is_plain_text,
35
+ do_allow_extended_languages,
36
+ do_pick_summary_language,
37
+ do_remove_weak_matches,
38
+ tld_hint,
39
+ encoding_hint,
40
+ language_hint,
41
+ language3,
42
+ percent3,
43
+ normalized_score3,
44
+ &text_bytes,
45
+ &is_reliable);
46
+
47
+ RESULT res;
48
+ res.name = LanguageName(lang);
49
+ res.code = ExtLanguageCode(lang);
50
+ res.reliable = is_reliable;
51
+ return res;
52
+ }
53
+ }
54
+
55
+ int main(int argc, char **argv) {
56
+ }
data/lib/cld.rb CHANGED
@@ -1,12 +1,21 @@
1
- require "rubygems"
1
+ require "cld/version"
2
2
  require "ffi"
3
3
 
4
4
  module CLD
5
5
  extend FFI::Library
6
- dir = File.expand_path(File.join(File.dirname(__FILE__), "../ext/cld"))
7
- ffi_lib "#{dir}/cld.so"
8
- attach_function "detect_language","detectLanguageThunkInt", [:buffer_in], :int
9
- def self.english?(text)
10
- detect_language(text) == 0
6
+
7
+ def self.detect_language(text)
8
+ result = detect_language_ext(text)
9
+ Hash[ result.members.map {|member| [member.to_sym, result[member]]} ]
11
10
  end
11
+
12
+ private
13
+
14
+ class ReturnValue < FFI::Struct
15
+ layout :name, :string, :code, :string, :reliable, :bool
16
+ end
17
+
18
+ GEM_ROOT = File.expand_path("../../", __FILE__)
19
+ ffi_lib "#{GEM_ROOT}/ext/cld/cld.so"
20
+ attach_function "detect_language_ext","detectLanguageThunkInt", [:buffer_in], ReturnValue.by_value
12
21
  end