sanscript 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 348f8d72cc3d76ba760a4225a4f784324294474a
4
- data.tar.gz: f3a1215ad14dc3778795dc0f6563345aaa3015fe
3
+ metadata.gz: dd5f98881a344f630109b020f7614bf3c860a919
4
+ data.tar.gz: e3a1353bf994c3aabee9b779cc35e70c98f70bb3
5
5
  SHA512:
6
- metadata.gz: c8eae2315a8d3a68ce1a873585ab752902287a027a6d45961fef77ad0174d3646d7c7c36107a5879b5b8b36c10afef93428b34098b2ae86dfb6026bb4d644e94
7
- data.tar.gz: 1997c7bb6d11f4b139eb5cde903bc17bffb786bcad236351100e55248d57945fbd19e7cf83f409af357c4c36d1dfb43fbb5adc5a0c2997e66622ed1576bb1921
6
+ metadata.gz: 925e8af64d8eeed22da8b35ffa3b569b80da03956b419bc5055c7bfafc6e52f80dffbdf810f7c538ef50ec103df9431c334295f84811f861d5a78d88a30dc2c5
7
+ data.tar.gz: 82d5cba98f7a7f38899df4bdc20a72597882743ed7fc2a484da9c2d96097dea075cbcfa433b3db20a0ef15075a962d0022851c250a80f337e218d4f2411dc31d
data/.gitignore CHANGED
@@ -7,3 +7,6 @@
7
7
  /pkg/
8
8
  /spec/reports/
9
9
  /tmp/
10
+ /rust/target
11
+ /rust/Makefile
12
+ /rust/libsanscript.*
data/.rubocop.yml CHANGED
@@ -1,3 +1,4 @@
1
+ require: rubocop-rspec
1
2
  AllCops:
2
3
  TargetRubyVersion: 2.3
3
4
 
data/README.md CHANGED
@@ -1,9 +1,11 @@
1
1
  # Sanscript.rb
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/sanscript.svg)](https://badge.fury.io/rb/sanscript)
4
+ [![Dependency Status](https://gemnasium.com/badges/github.com/ubcsanskrit/sanscript.rb.svg)](https://gemnasium.com/github.com/ubcsanskrit/sanscript.rb)
3
5
  [![Build Status](https://travis-ci.org/ubcsanskrit/sanscript.rb.svg?branch=master)](https://travis-ci.org/ubcsanskrit/sanscript.rb)
4
- [![Code Climate](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/badges/gpa.svg)](https://codeclimate.com/github/ubcsanskrit/sanscript.rb)
5
6
  [![Test Coverage](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/badges/coverage.svg)](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/coverage)
6
- [![Dependency Status](https://gemnasium.com/badges/github.com/ubcsanskrit/sanscript.rb.svg)](https://gemnasium.com/github.com/ubcsanskrit/sanscript.rb)
7
+ [![Code Climate](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/badges/gpa.svg)](https://codeclimate.com/github/ubcsanskrit/sanscript.rb)
8
+ [![Inline docs](http://inch-ci.org/github/ubcsanskrit/sanscript.rb.svg?branch=master)](http://inch-ci.org/github/ubcsanskrit/sanscript.rb)
7
9
 
8
10
  This gem is starting off as a mostly-straightforward port of [learnsanskrit.org's Sanscript.js](https://github.com/sanskrit/sanscript.js), and will go from there. It also incorporates transliteration scheme detection based on [learnsanskrit.org's Detect.js](https://github.com/sanskrit/detect.js).
9
11
 
@@ -33,7 +35,7 @@ Documentation is provided in YARD format and available online at [rubydoc.info](
33
35
 
34
36
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
35
37
 
36
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
38
+ To install this gem onto your local machine, run `bundle exec rake install`.
37
39
 
38
40
  ## Contributing
39
41
 
data/Rakefile CHANGED
@@ -5,3 +5,9 @@ require "rspec/core/rake_task"
5
5
  RSpec::Core::RakeTask.new(:spec)
6
6
 
7
7
  task default: :spec
8
+
9
+ task :compile do
10
+ Dir.chdir("#{File.dirname(__FILE__)}/rust")
11
+ require "./extconf.rb"
12
+ sh "make clean && make && make install" if File.exist?("Makefile")
13
+ end
@@ -49,8 +49,6 @@ module Sanscript
49
49
  :RE_KOLKATA_ONLY, :RE_ITRANS_ONLY, :RE_SLP1_ONLY, :RE_VELTHUIS_ONLY,
50
50
  :RE_ITRANS_OR_VELTHUIS_ONLY, :RE_HARVARD_KYOTO, :RE_CONTROL_BLOCK
51
51
 
52
- module_function
53
-
54
52
  # @!method detect_scheme(text)
55
53
  # Attempts to detect the encoding scheme of the provided string.
56
54
  #
@@ -70,5 +68,23 @@ module Sanscript
70
68
  extend Ruby2x
71
69
  end
72
70
  # :nocov:
71
+
72
+ # Rust FFI
73
+ class << self
74
+ begin
75
+ require "ffi"
76
+ extend FFI::Library
77
+ ffi_lib Dir.glob(File.join(GEM_ROOT, "rust/libsanscript.*")).first
78
+ attach_function :_rust_detect, :detect, [:string], :int
79
+ RUST_SCHEMES = %i[devanagari bengali gurmukhi gujarati oriya tamil telugu kannada malayalam iast kolkata itrans slp1 velthuis hk].unshift(nil).freeze
80
+ private_constant :RUST_SCHEMES
81
+ def rust_detect_scheme(text)
82
+ RUST_SCHEMES[_rust_detect(text)]
83
+ end
84
+ alias detect_scheme rust_detect_scheme
85
+ rescue LoadError
86
+ alias detect_scheme ruby_detect_scheme
87
+ end
88
+ end
73
89
  end
74
90
  end
@@ -9,7 +9,7 @@ module Sanscript
9
9
  #
10
10
  # @param text [String] a string of Sanskrit text
11
11
  # @return [Symbol, nil] the Symbol of the scheme, or nil if no match
12
- def detect_scheme(text)
12
+ def ruby_detect_scheme(text)
13
13
  text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
14
14
 
15
15
  # Brahmic schemes are all within a specific range of code points.
@@ -8,7 +8,7 @@ module Sanscript
8
8
  #
9
9
  # @param text [String] a string of Sanskrit text
10
10
  # @return [Symbol, nil] the Symbol of the scheme, or nil if no match
11
- def detect_scheme(text)
11
+ def ruby_detect_scheme(text)
12
12
  text = text.to_str.gsub(RE_CONTROL_BLOCK, "")
13
13
 
14
14
  # rubocop:disable Style/CaseEquality
@@ -1,5 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  module Sanscript
3
3
  # The version number
4
- VERSION = "0.5.0"
4
+ VERSION = "0.6.0"
5
+
6
+ GEM_ROOT = Pathname.new(File.realpath(File.join(__dir__, "..", "..")))
7
+ private_constant :GEM_ROOT
5
8
  end
data/rust/Cargo.lock ADDED
@@ -0,0 +1,93 @@
1
+ [root]
2
+ name = "sanscript"
3
+ version = "0.1.0"
4
+ dependencies = [
5
+ "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
6
+ "libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
7
+ "regex 0.1.73 (registry+https://github.com/rust-lang/crates.io-index)",
8
+ ]
9
+
10
+ [[package]]
11
+ name = "aho-corasick"
12
+ version = "0.5.2"
13
+ source = "registry+https://github.com/rust-lang/crates.io-index"
14
+ dependencies = [
15
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
16
+ ]
17
+
18
+ [[package]]
19
+ name = "kernel32-sys"
20
+ version = "0.2.2"
21
+ source = "registry+https://github.com/rust-lang/crates.io-index"
22
+ dependencies = [
23
+ "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
24
+ "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
25
+ ]
26
+
27
+ [[package]]
28
+ name = "lazy_static"
29
+ version = "0.2.1"
30
+ source = "registry+https://github.com/rust-lang/crates.io-index"
31
+
32
+ [[package]]
33
+ name = "libc"
34
+ version = "0.2.15"
35
+ source = "registry+https://github.com/rust-lang/crates.io-index"
36
+
37
+ [[package]]
38
+ name = "memchr"
39
+ version = "0.1.11"
40
+ source = "registry+https://github.com/rust-lang/crates.io-index"
41
+ dependencies = [
42
+ "libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
43
+ ]
44
+
45
+ [[package]]
46
+ name = "regex"
47
+ version = "0.1.73"
48
+ source = "registry+https://github.com/rust-lang/crates.io-index"
49
+ dependencies = [
50
+ "aho-corasick 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)",
51
+ "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
52
+ "regex-syntax 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
53
+ "thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
54
+ "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
55
+ ]
56
+
57
+ [[package]]
58
+ name = "regex-syntax"
59
+ version = "0.3.4"
60
+ source = "registry+https://github.com/rust-lang/crates.io-index"
61
+
62
+ [[package]]
63
+ name = "thread-id"
64
+ version = "2.0.0"
65
+ source = "registry+https://github.com/rust-lang/crates.io-index"
66
+ dependencies = [
67
+ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
68
+ "libc 0.2.15 (registry+https://github.com/rust-lang/crates.io-index)",
69
+ ]
70
+
71
+ [[package]]
72
+ name = "thread_local"
73
+ version = "0.2.6"
74
+ source = "registry+https://github.com/rust-lang/crates.io-index"
75
+ dependencies = [
76
+ "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
77
+ ]
78
+
79
+ [[package]]
80
+ name = "utf8-ranges"
81
+ version = "0.1.3"
82
+ source = "registry+https://github.com/rust-lang/crates.io-index"
83
+
84
+ [[package]]
85
+ name = "winapi"
86
+ version = "0.2.8"
87
+ source = "registry+https://github.com/rust-lang/crates.io-index"
88
+
89
+ [[package]]
90
+ name = "winapi-build"
91
+ version = "0.1.1"
92
+ source = "registry+https://github.com/rust-lang/crates.io-index"
93
+
data/rust/Cargo.toml ADDED
@@ -0,0 +1,12 @@
1
+ [package]
2
+ name = "sanscript"
3
+ version = "0.1.0"
4
+ authors = ["Tim Bellefleur <nomoon@phoebus.ca>"]
5
+
6
+ [dependencies]
7
+ libc = "*"
8
+ lazy_static = "*"
9
+ regex = "*"
10
+
11
+ [lib]
12
+ crate-type = ["dylib"]
data/rust/extconf.rb ADDED
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+ require "mkmf"
3
+ unless !system("cargo --version") || !system("rustc --version")
4
+ create_makefile("sanscript")
5
+ File.write("Makefile", "all:\n\tcargo build --release\n\nclean:\n\trm -rf target\n\ninstall:\n\tmv target/release/libsanscript.* .\n\trm -rf target\n")
6
+ end
data/rust/src/lib.rs ADDED
@@ -0,0 +1,100 @@
1
+ extern crate libc;
2
+ #[macro_use] extern crate lazy_static;
3
+ extern crate regex;
4
+
5
+ use libc::{c_char, int32_t};
6
+ use std::ffi::CStr;
7
+ use std::str;
8
+ use regex::Regex;
9
+
10
+ #[no_mangle]
11
+ pub extern fn detect(s: *const c_char) -> int32_t {
12
+ let c_str = unsafe {
13
+ assert!(!s.is_null());
14
+
15
+ CStr::from_ptr(s)
16
+ };
17
+
18
+ lazy_static! {
19
+ // # Match escaped control characters
20
+ static ref RE_ESCAPED_CONTROL_CHAR: Regex = Regex::new(r"\\(?:\{#|\##|\#})").unwrap();
21
+
22
+ // # Match ##...## or {#...#} control blocks.
23
+ static ref RE_CONTROL_BLOCK: Regex = Regex::new(r"##.*?##|\{#.*?#\}").unwrap();
24
+
25
+ // Match any character in the block of Brahmic scripts
26
+ // between Devanagari and Malayalam.
27
+ static ref RE_BRAHMIC_RANGE: Regex = Regex::new(r"[\x{0900}-\x{0d7f}]").unwrap();
28
+
29
+ // Match on special Roman characters
30
+ static ref RE_IAST_OR_KOLKATA_ONLY: Regex = Regex::new(r"(?i)[āīūṛṝḷḹēōṃḥṅñṭḍṇśṣḻ]").unwrap();
31
+
32
+ // Match on Kolkata-specific Roman characters
33
+ static ref RE_KOLKATA_ONLY: Regex = Regex::new(r"(?i)[ēō]").unwrap();
34
+
35
+ // Match on ITRANS-only
36
+ static ref RE_ITRANS_ONLY: Regex = Regex::new(r"ee|oo|\^[iI]|RR[iI]|L[iI]|~N|N\^|Ch|chh|JN|sh|Sh|\.a").unwrap();
37
+
38
+ // Match on SLP1-only characters and bigrams
39
+ static ref RE_SLP1_ONLY: Regex = Regex::new(r"[fFxXEOCYwWqQPB]|kz|N[kg]|tT|dD|S[cn]|[aAiIuUeo]R|G[yr]").unwrap();
40
+
41
+ // Match on Velthuis-only characters
42
+ static ref RE_VELTHUIS_ONLY: Regex = Regex::new(r"\.[mhnrlntds]|\x22n|~s").unwrap();
43
+
44
+ // Match on chars shared by ITRANS and Velthuis
45
+ static ref RE_ITRANS_OR_VELTHUIS_ONLY: Regex = Regex::new(r"aa|ii|uu|~n").unwrap();
46
+
47
+ // Match on characters available in Harvard-Kyoto
48
+ static ref RE_HARVARD_KYOTO: Regex = Regex::new(r"[aAiIuUeoRMHkgGcjJTDNtdnpbmyrlvzSsh]").unwrap();
49
+ }
50
+
51
+ let r_replaced_str = &RE_ESCAPED_CONTROL_CHAR.replace_all(c_str.to_str().unwrap(), "");
52
+ let r_str = &RE_CONTROL_BLOCK.replace_all(r_replaced_str, "");
53
+
54
+ // Brahmic schemes are all within a specific range of code points.
55
+ let brahmic_match = RE_BRAHMIC_RANGE.find(r_str);
56
+ if brahmic_match != None {
57
+ let brahmic_match = brahmic_match.unwrap();
58
+ let brahmic_codepoint = r_str.chars().nth(brahmic_match.0).unwrap() as u32;
59
+
60
+ if brahmic_codepoint < 0x0980 {
61
+ return 1;
62
+ } else if brahmic_codepoint < 0x0A00 {
63
+ return 2;
64
+ } else if brahmic_codepoint < 0x0A80 {
65
+ return 3;
66
+ } else if brahmic_codepoint < 0x0B00 {
67
+ return 4;
68
+ } else if brahmic_codepoint < 0x0B80 {
69
+ return 5;
70
+ } else if brahmic_codepoint < 0x0C00 {
71
+ return 6;
72
+ } else if brahmic_codepoint < 0x0C80 {
73
+ return 7;
74
+ } else if brahmic_codepoint < 0x0D00 {
75
+ return 8;
76
+ } else {
77
+ return 9;
78
+ }
79
+ }
80
+
81
+ // Romanizations
82
+ if RE_IAST_OR_KOLKATA_ONLY.is_match(r_str) {
83
+ if RE_KOLKATA_ONLY.is_match(r_str) {
84
+ return 11;
85
+ } else {
86
+ return 10;
87
+ }
88
+ } else if RE_ITRANS_ONLY.is_match(r_str) {
89
+ return 12;
90
+ } else if RE_SLP1_ONLY.is_match(r_str) {
91
+ return 13;
92
+ } else if RE_VELTHUIS_ONLY.is_match(r_str) {
93
+ return 14;
94
+ } else if RE_ITRANS_OR_VELTHUIS_ONLY.is_match(r_str) {
95
+ return 12;
96
+ } else if RE_HARVARD_KYOTO.is_match(r_str) {
97
+ return 15;
98
+ }
99
+ return 0;
100
+ }
data/sanscript.gemspec CHANGED
@@ -18,6 +18,7 @@ Gem::Specification.new do |spec|
18
18
  spec.bindir = "exe"
19
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
20
  spec.require_paths = ["lib"]
21
+ spec.extensions = Dir["rust/extconf.rb"]
21
22
 
22
23
  spec.required_ruby_version = "~> 2.2"
23
24
 
@@ -25,9 +26,12 @@ Gem::Specification.new do |spec|
25
26
  spec.add_development_dependency "rake", "~> 11.2"
26
27
  spec.add_development_dependency "rspec", "~> 3.5"
27
28
  spec.add_development_dependency "codeclimate-test-reporter", "~> 0.6"
29
+ spec.add_development_dependency "rubocop", "~> 0.41"
30
+ spec.add_development_dependency "rubocop-rspec", "~> 1.5"
28
31
  spec.add_development_dependency "pry", "~> 0.10"
29
32
  spec.add_development_dependency "benchmark-ips", "~> 2.6"
30
33
  spec.add_development_dependency "yard", "~> 0.9"
31
34
 
32
- spec.add_runtime_dependency "ragabash", "~> 0.1"
35
+ spec.add_runtime_dependency "ragabash", "~> 0.2"
36
+ spec.add_runtime_dependency "ffi"
33
37
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sanscript
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tim Bellefleur
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-23 00:00:00.000000000 Z
11
+ date: 2016-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,6 +66,34 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.6'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rubocop
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.41'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.41'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rubocop-rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.5'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.5'
69
97
  - !ruby/object:Gem::Dependency
70
98
  name: pry
71
99
  requirement: !ruby/object:Gem::Requirement
@@ -114,19 +142,34 @@ dependencies:
114
142
  requirements:
115
143
  - - "~>"
116
144
  - !ruby/object:Gem::Version
117
- version: '0.1'
145
+ version: '0.2'
118
146
  type: :runtime
119
147
  prerelease: false
120
148
  version_requirements: !ruby/object:Gem::Requirement
121
149
  requirements:
122
150
  - - "~>"
123
151
  - !ruby/object:Gem::Version
124
- version: '0.1'
152
+ version: '0.2'
153
+ - !ruby/object:Gem::Dependency
154
+ name: ffi
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">="
165
+ - !ruby/object:Gem::Version
166
+ version: '0'
125
167
  description:
126
168
  email:
127
169
  - nomoon@phoebus.ca
128
170
  executables: []
129
- extensions: []
171
+ extensions:
172
+ - rust/extconf.rb
130
173
  extra_rdoc_files: []
131
174
  files:
132
175
  - ".codeclimate.yml"
@@ -150,6 +193,10 @@ files:
150
193
  - lib/sanscript/transliterate.rb
151
194
  - lib/sanscript/transliterate/schemes.rb
152
195
  - lib/sanscript/version.rb
196
+ - rust/Cargo.lock
197
+ - rust/Cargo.toml
198
+ - rust/extconf.rb
199
+ - rust/src/lib.rs
153
200
  - sanscript.gemspec
154
201
  homepage: https://github.com/ubcsanskrit/sanscript.rb
155
202
  licenses: