sanscript 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 348f8d72cc3d76ba760a4225a4f784324294474a
4
- data.tar.gz: f3a1215ad14dc3778795dc0f6563345aaa3015fe
3
+ metadata.gz: dd5f98881a344f630109b020f7614bf3c860a919
4
+ data.tar.gz: e3a1353bf994c3aabee9b779cc35e70c98f70bb3
5
5
  SHA512:
6
- metadata.gz: c8eae2315a8d3a68ce1a873585ab752902287a027a6d45961fef77ad0174d3646d7c7c36107a5879b5b8b36c10afef93428b34098b2ae86dfb6026bb4d644e94
7
- data.tar.gz: 1997c7bb6d11f4b139eb5cde903bc17bffb786bcad236351100e55248d57945fbd19e7cf83f409af357c4c36d1dfb43fbb5adc5a0c2997e66622ed1576bb1921
6
+ metadata.gz: 925e8af64d8eeed22da8b35ffa3b569b80da03956b419bc5055c7bfafc6e52f80dffbdf810f7c538ef50ec103df9431c334295f84811f861d5a78d88a30dc2c5
7
+ data.tar.gz: 82d5cba98f7a7f38899df4bdc20a72597882743ed7fc2a484da9c2d96097dea075cbcfa433b3db20a0ef15075a962d0022851c250a80f337e218d4f2411dc31d
data/.gitignore CHANGED
@@ -7,3 +7,6 @@
7
7
  /pkg/
8
8
  /spec/reports/
9
9
  /tmp/
10
+ /rust/target
11
+ /rust/Makefile
12
+ /rust/libsanscript.*
data/.rubocop.yml CHANGED
@@ -1,3 +1,4 @@
1
+ require: rubocop-rspec
1
2
  AllCops:
2
3
  TargetRubyVersion: 2.3
3
4
 
data/README.md CHANGED
@@ -1,9 +1,11 @@
1
1
  # Sanscript.rb
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/sanscript.svg)](https://badge.fury.io/rb/sanscript)
4
+ [![Dependency Status](https://gemnasium.com/badges/github.com/ubcsanskrit/sanscript.rb.svg)](https://gemnasium.com/github.com/ubcsanskrit/sanscript.rb)
3
5
  [![Build Status](https://travis-ci.org/ubcsanskrit/sanscript.rb.svg?branch=master)](https://travis-ci.org/ubcsanskrit/sanscript.rb)
4
- [![Code Climate](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/badges/gpa.svg)](https://codeclimate.com/github/ubcsanskrit/sanscript.rb)
5
6
  [![Test Coverage](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/badges/coverage.svg)](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/coverage)
6
- [![Dependency Status](https://gemnasium.com/badges/github.com/ubcsanskrit/sanscript.rb.svg)](https://gemnasium.com/github.com/ubcsanskrit/sanscript.rb)
7
+ [![Code Climate](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/badges/gpa.svg)](https://codeclimate.com/github/ubcsanskrit/sanscript.rb)
8
+ [![Inline docs](http://inch-ci.org/github/ubcsanskrit/sanscript.rb.svg?branch=master)](http://inch-ci.org/github/ubcsanskrit/sanscript.rb)
7
9
 
8
10
  This gem is starting off as a mostly-straightforward port of [learnsanskrit.org's Sanscript.js](https://github.com/sanskrit/sanscript.js), and will go from there. It also incorporates transliteration scheme detection based on [learnsanskrit.org's Detect.js](https://github.com/sanskrit/detect.js).
9
11
 
@@ -33,7 +35,7 @@ Documentation is provided in YARD format and available online at [rubydoc.info](
33
35
 
34
36
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
35
37
 
36
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
38
+ To install this gem onto your local machine, run `bundle exec rake install`.
37
39
 
38
40
  ## Contributing
39
41
 
data/Rakefile CHANGED
@@ -5,3 +5,9 @@ require "rspec/core/rake_task"
5
5
  RSpec::Core::RakeTask.new(:spec)
6
6
 
7
7
  task default: :spec
8
+
9
+ task :compile do
10
+ Dir.chdir("#{File.dirname(__FILE__)}/rust")
11
+ require "./extconf.rb"
12
+ sh "make clean && make && make install" if File.exist?("Makefile")
13
+ end
@@ -49,8 +49,6 @@ module Sanscript
49
49
  :RE_KOLKATA_ONLY, :RE_ITRANS_ONLY, :RE_SLP1_ONLY, :RE_VELTHUIS_ONLY,
50
50
  :RE_ITRANS_OR_VELTHUIS_ONLY, :RE_HARVARD_KYOTO, :RE_CONTROL_BLOCK
51
51
 
52
- module_function
53
-
54
52
  # @!method detect_scheme(text)
55
53
  # Attempts to detect the encoding scheme of the provided string.
56
54
  #
@@ -70,5 +68,23 @@ module Sanscript
70
68
  extend Ruby2x
71
69
  end
72
70
  # :nocov:
71
+
72
+ # Rust FFI
73
+ class << self
74
+ begin
75
+ require "ffi"
76
+ extend FFI::Library
77
+ ffi_lib Dir.glob(File.join(GEM_ROOT, "rust/libsanscript.*")).first
78
+ attach_function :_rust_detect, :detect, [:string], :int
79
+ RUST_SCHEMES = %i[devanagari bengali gurmukhi gujarati oriya tamil telugu kannada malayalam iast kolkata itrans slp1 velthuis hk].unshift(nil).freeze
80
+ private_constant :RUST_SCHEMES
81
+ def rust_detect_scheme(text)
82
+ RUST_SCHEMES[_rust_detect(text)]
83
+ end
84
+ alias detect_scheme rust_detect_scheme
85
+ rescue LoadError
86
+ alias detect_scheme ruby_detect_scheme
87
+ end
88
+ end
73
89
  end
74
90
  end
@@ -9,7 +9,7 @@ module Sanscript
9
9
  #
10
10
  # @param text [String] a string of Sanskrit text
11
11
  # @return [Symbol, nil] the Symbol of the scheme, or nil if no match
12
- def detect_scheme(text)
12
+ def ruby_detect_scheme(text)
13
13
  text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
14
14
 
15
15
  # Brahmic schemes are all within a specific range of code points.
@@ -8,7 +8,7 @@ module Sanscript
8
8
  #
9
9
  # @param text [String] a string of Sanskrit text
10
10
  # @return [Symbol, nil] the Symbol of the scheme, or nil if no match
11
- def detect_scheme(text)
11
+ def ruby_detect_scheme(text)
12
12
  text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
13
13
 
14
14
  # rubocop:disable Style/CaseEquality
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  module Sanscript
3
3
  # The version number
4
- VERSION = "0.5.0"
4
+ VERSION = "0.6.0"
5
+
6
+ GEM_ROOT = Pathname.new(File.realpath(File.join(__dir__, "..", "..")))
7
+ private_constant :GEM_ROOT
5
8
  end
data/rust/Cargo.lock ADDED
@@ -0,0 +1,93 @@
1
+ [root]
2
+ name = "sanscript"
3
+ version = "0.1.0"
4
+ dependencies = [
5
+ "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
6
+ "libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
7
+ "regex 0.1.73 (registry+https://github.com/rust-lang/crates.io-index)",
8
+ ]
9
+
10
+ [[package]]
11
+ name = "aho-corasick"
12
+ version = "0.5.2"
13
+ source = "registry+https://github.com/rust-lang/crates.io-index"
14
+ dependencies = [
15
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
16
+ ]
17
+
18
+ [[package]]
19
+ name = "kernel32-sys"
20
+ version = "0.2.2"
21
+ source = "registry+https://github.com/rust-lang/crates.io-index"
22
+ dependencies = [
23
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
24
+ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
25
+ ]
26
+
27
+ [[package]]
28
+ name = "lazy_static"
29
+ version = "0.2.1"
30
+ source = "registry+https://github.com/rust-lang/crates.io-index"
31
+
32
+ [[package]]
33
+ name = "libc"
34
+ version = "0.2.15"
35
+ source = "registry+https://github.com/rust-lang/crates.io-index"
36
+
37
+ [[package]]
38
+ name = "memchr"
39
+ version = "0.1.11"
40
+ source = "registry+https://github.com/rust-lang/crates.io-index"
41
+ dependencies = [
42
+ "libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
43
+ ]
44
+
45
+ [[package]]
46
+ name = "regex"
47
+ version = "0.1.73"
48
+ source = "registry+https://github.com/rust-lang/crates.io-index"
49
+ dependencies = [
50
+ "aho-corasick 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
51
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
52
+ "regex-syntax 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
53
+ "thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
54
+ "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
55
+ ]
56
+
57
+ [[package]]
58
+ name = "regex-syntax"
59
+ version = "0.3.4"
60
+ source = "registry+https://github.com/rust-lang/crates.io-index"
61
+
62
+ [[package]]
63
+ name = "thread-id"
64
+ version = "2.0.0"
65
+ source = "registry+https://github.com/rust-lang/crates.io-index"
66
+ dependencies = [
67
+ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
68
+ "libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
69
+ ]
70
+
71
+ [[package]]
72
+ name = "thread_local"
73
+ version = "0.2.6"
74
+ source = "registry+https://github.com/rust-lang/crates.io-index"
75
+ dependencies = [
76
+ "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
77
+ ]
78
+
79
+ [[package]]
80
+ name = "utf8-ranges"
81
+ version = "0.1.3"
82
+ source = "registry+https://github.com/rust-lang/crates.io-index"
83
+
84
+ [[package]]
85
+ name = "winapi"
86
+ version = "0.2.8"
87
+ source = "registry+https://github.com/rust-lang/crates.io-index"
88
+
89
+ [[package]]
90
+ name = "winapi-build"
91
+ version = "0.1.1"
92
+ source = "registry+https://github.com/rust-lang/crates.io-index"
93
+
data/rust/Cargo.toml ADDED
@@ -0,0 +1,12 @@
1
+ [package]
2
+ name = "sanscript"
3
+ version = "0.1.0"
4
+ authors = ["Tim Bellefleur <nomoon@phoebus.ca>"]
5
+
6
+ [dependencies]
7
+ libc = "*"
8
+ lazy_static = "*"
9
+ regex = "*"
10
+
11
+ [lib]
12
+ crate-type = ["dylib"]
data/rust/extconf.rb ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+ require "mkmf"
3
+ unless !system("cargo --version") || !system("rustc --version")
4
+ create_makefile("sanscript")
5
+ File.write("Makefile", "all:\n\tcargo build --release\n\nclean:\n\trm -rf target\n\ninstall:\n\tmv target/release/libsanscript.* .\n\trm -rf target\n")
6
+ end
data/rust/src/lib.rs ADDED
@@ -0,0 +1,100 @@
1
+ extern crate libc;
2
+ #[macro_use] extern crate lazy_static;
3
+ extern crate regex;
4
+
5
+ use libc::{c_char, int32_t};
6
+ use std::ffi::CStr;
7
+ use std::str;
8
+ use regex::Regex;
9
+
10
+ #[no_mangle]
11
+ pub extern fn detect(s: *const c_char) -> int32_t {
12
+ let c_str = unsafe {
13
+ assert!(!s.is_null());
14
+
15
+ CStr::from_ptr(s)
16
+ };
17
+
18
+ lazy_static! {
19
+ // # Match escaped control characters
20
+ static ref RE_ESCAPED_CONTROL_CHAR: Regex = Regex::new(r"\\(?:\{#|\##|\#})").unwrap();
21
+
22
+ // # Match ##...## or {#...#} control blocks.
23
+ static ref RE_CONTROL_BLOCK: Regex = Regex::new(r"##.*?##|\{#.*?#\}").unwrap();
24
+
25
+ // Match any character in the block of Brahmic scripts
26
+ // between Devanagari and Malayalam.
27
+ static ref RE_BRAHMIC_RANGE: Regex = Regex::new(r"[\x{0900}-\x{0d7f}]").unwrap();
28
+
29
+ // Match on special Roman characters
30
+ static ref RE_IAST_OR_KOLKATA_ONLY: Regex = Regex::new(r"(?i)[āīūṛṝḷḹēōṃḥṅñṭḍṇśṣḻ]").unwrap();
31
+
32
+ // Match on Kolkata-specific Roman characters
33
+ static ref RE_KOLKATA_ONLY: Regex = Regex::new(r"(?i)[ēō]").unwrap();
34
+
35
+ // Match on ITRANS-only
36
+ static ref RE_ITRANS_ONLY: Regex = Regex::new(r"ee|oo|\^[iI]|RR[iI]|L[iI]|~N|N\^|Ch|chh|JN|sh|Sh|\.a").unwrap();
37
+
38
+ // Match on SLP1-only characters and bigrams
39
+ static ref RE_SLP1_ONLY: Regex = Regex::new(r"[fFxXEOCYwWqQPB]|kz|N[kg]|tT|dD|S[cn]|[aAiIuUeo]R|G[yr]").unwrap();
40
+
41
+ // Match on Velthuis-only characters
42
+ static ref RE_VELTHUIS_ONLY: Regex = Regex::new(r"\.[mhnrlntds]|\x22n|~s").unwrap();
43
+
44
+ // Match on chars shared by ITRANS and Velthuis
45
+ static ref RE_ITRANS_OR_VELTHUIS_ONLY: Regex = Regex::new(r"aa|ii|uu|~n").unwrap();
46
+
47
+ // Match on characters available in Harvard-Kyoto
48
+ static ref RE_HARVARD_KYOTO: Regex = Regex::new(r"[aAiIuUeoRMHkgGcjJTDNtdnpbmyrlvzSsh]").unwrap();
49
+ }
50
+
51
+ let r_replaced_str = &RE_ESCAPED_CONTROL_CHAR.replace_all(c_str.to_str().unwrap(), "");
52
+ let r_str = &RE_CONTROL_BLOCK.replace_all(r_replaced_str, "");
53
+
54
+ // Brahmic schemes are all within a specific range of code points.
55
+ let brahmic_match = RE_BRAHMIC_RANGE.find(r_str);
56
+ if brahmic_match != None {
57
+ let brahmic_match = brahmic_match.unwrap();
58
+ let brahmic_codepoint = r_str.chars().nth(brahmic_match.0).unwrap() as u32;
59
+
60
+ if brahmic_codepoint < 0x0980 {
61
+ return 1;
62
+ } else if brahmic_codepoint < 0x0A00 {
63
+ return 2;
64
+ } else if brahmic_codepoint < 0x0A80 {
65
+ return 3;
66
+ } else if brahmic_codepoint < 0x0B00 {
67
+ return 4;
68
+ } else if brahmic_codepoint < 0x0B80 {
69
+ return 5;
70
+ } else if brahmic_codepoint < 0x0C00 {
71
+ return 6;
72
+ } else if brahmic_codepoint < 0x0C80 {
73
+ return 7;
74
+ } else if brahmic_codepoint < 0x0D00 {
75
+ return 8;
76
+ } else {
77
+ return 9;
78
+ }
79
+ }
80
+
81
+ // Romanizations
82
+ if RE_IAST_OR_KOLKATA_ONLY.is_match(r_str) {
83
+ if RE_KOLKATA_ONLY.is_match(r_str) {
84
+ return 11;
85
+ } else {
86
+ return 10;
87
+ }
88
+ } else if RE_ITRANS_ONLY.is_match(r_str) {
89
+ return 12;
90
+ } else if RE_SLP1_ONLY.is_match(r_str) {
91
+ return 13;
92
+ } else if RE_VELTHUIS_ONLY.is_match(r_str) {
93
+ return 14;
94
+ } else if RE_ITRANS_OR_VELTHUIS_ONLY.is_match(r_str) {
95
+ return 12;
96
+ } else if RE_HARVARD_KYOTO.is_match(r_str) {
97
+ return 15;
98
+ }
99
+ return 0;
100
+ }
data/sanscript.gemspec CHANGED
@@ -18,6 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.bindir = "exe"
19
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
20
  spec.require_paths = ["lib"]
21
+ spec.extensions = Dir["rust/extconf.rb"]
21
22
 
22
23
  spec.required_ruby_version = "~> 2.2"
23
24
 
@@ -25,9 +26,12 @@ Gem::Specification.new do |spec|
25
26
  spec.add_development_dependency "rake", "~> 11.2"
26
27
  spec.add_development_dependency "rspec", "~> 3.5"
27
28
  spec.add_development_dependency "codeclimate-test-reporter", "~> 0.6"
29
+ spec.add_development_dependency "rubocop", "~> 0.41"
30
+ spec.add_development_dependency "rubocop-rspec", "~> 1.5"
28
31
  spec.add_development_dependency "pry", "~> 0.10"
29
32
  spec.add_development_dependency "benchmark-ips", "~> 2.6"
30
33
  spec.add_development_dependency "yard", "~> 0.9"
31
34
 
32
- spec.add_runtime_dependency "ragabash", "~> 0.1"
35
+ spec.add_runtime_dependency "ragabash", "~> 0.2"
36
+ spec.add_runtime_dependency "ffi"
33
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Bellefleur
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-23 00:00:00.000000000 Z
11
+ date: 2016-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,34 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.6'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.41'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.41'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop-rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.5'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.5'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: pry
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -114,19 +142,34 @@ dependencies:
114
142
  requirements:
115
143
  - - "~>"
116
144
  - !ruby/object:Gem::Version
117
- version: '0.1'
145
+ version: '0.2'
118
146
  type: :runtime
119
147
  prerelease: false
120
148
  version_requirements: !ruby/object:Gem::Requirement
121
149
  requirements:
122
150
  - - "~>"
123
151
  - !ruby/object:Gem::Version
124
- version: '0.1'
152
+ version: '0.2'
153
+ - !ruby/object:Gem::Dependency
154
+ name: ffi
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
125
167
  description:
126
168
  email:
127
169
  - nomoon@phoebus.ca
128
170
  executables: []
129
- extensions: []
171
+ extensions:
172
+ - rust/extconf.rb
130
173
  extra_rdoc_files: []
131
174
  files:
132
175
  - ".codeclimate.yml"
@@ -150,6 +193,10 @@ files:
150
193
  - lib/sanscript/transliterate.rb
151
194
  - lib/sanscript/transliterate/schemes.rb
152
195
  - lib/sanscript/version.rb
196
+ - rust/Cargo.lock
197
+ - rust/Cargo.toml
198
+ - rust/extconf.rb
199
+ - rust/src/lib.rs
153
200
  - sanscript.gemspec
154
201
  homepage: https://github.com/ubcsanskrit/sanscript.rb
155
202
  licenses: