sanscript 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d94fb9c3290ec64af941b806bb8cd78f0b66b442
4
+ data.tar.gz: 3e298a3b363a89081fbf603247f2b49a71144b8a
5
+ SHA512:
6
+ metadata.gz: 8ed2a31fa2f140f4e0085638996cbf31693735d07348fb367505fa104a06a1d22834f18ac3cbc0696079ff0503729b0b64192c006ccce1945ad3de5737d8aef3
7
+ data.tar.gz: fe535ef6247b9d91ab23643566a2d9e86d7c144cdccf66f3ce6f8b18ba49830813c8d9c20e27c10fcb536c480cee25d504119bf2ba7f04d2676b49f59d4135d4
@@ -0,0 +1,9 @@
1
+ ---
2
+ engines:
3
+ rubocop:
4
+ enabled: true
5
+ fixme:
6
+ enabled: true
7
+ ratings:
8
+ paths:
9
+ - "**.rb"
@@ -0,0 +1,9 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
@@ -0,0 +1,97 @@
1
+ AllCops:
2
+ TargetRubyVersion: 2.3
3
+
4
+ # Metrics
5
+ Metrics/AbcSize:
6
+ Description: >-
7
+ A calculated magnitude based on number of assignments,
8
+ branches, and conditions.
9
+ Reference: 'http://c2.com/cgi/wiki?AbcMetric'
10
+ Enabled: false
11
+ Max: 20
12
+
13
+ Metrics/BlockNesting:
14
+ Description: 'Avoid excessive block nesting'
15
+ StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#three-is-the-number-thou-shalt-count'
16
+ Enabled: true
17
+ Max: 4
18
+
19
+ Metrics/ClassLength:
20
+ Description: 'Avoid classes longer than 250 lines of code.'
21
+ Enabled: true
22
+ Max: 250
23
+
24
+ Metrics/CyclomaticComplexity:
25
+ Description: >-
26
+ A complexity metric that is strongly correlated to the number
27
+ of test cases needed to validate a method.
28
+ Enabled: true
29
+ Max: 10
30
+
31
+ Metrics/LineLength:
32
+ Description: 'Limit lines to 80 characters.'
33
+ StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#80-character-limits'
34
+ Enabled: false
35
+
36
+ Metrics/MethodLength:
37
+ Description: 'Avoid methods longer than 40 lines of code.'
38
+ Enabled: true
39
+ Max: 40
40
+
41
+ Metrics/ModuleLength:
42
+ Description: 'Avoid modules longer than 250 lines of code.'
43
+ Enabled: true
44
+ Max: 250
45
+
46
+ Metrics/ParameterLists:
47
+ Description: 'Avoid parameter lists longer than three or four parameters.'
48
+ StyleGuide: 'https://github.com/bbatsov/ruby-style-guide#too-many-params'
49
+ Enabled: true
50
+
51
+ Metrics/PerceivedComplexity:
52
+ Description: >-
53
+ A complexity metric geared towards measuring complexity for a
54
+ human reader.
55
+ Enabled: false
56
+
57
+ # Style
58
+ Style/AsciiComments:
59
+ Enabled: false
60
+
61
+ Style/CollectionMethods:
62
+ Enabled: true
63
+
64
+ Style/Documentation:
65
+ Enabled: false
66
+
67
+ Style/EmptyLiteral:
68
+ Enabled: false
69
+
70
+ Style/FormatString:
71
+ EnforcedStyle: percent
72
+
73
+ Style/HashSyntax:
74
+ EnforcedStyle: ruby19_no_mixed_keys
75
+
76
+ Style/MethodCalledOnDoEndBlock:
77
+ Enabled: true
78
+
79
+ Style/PercentLiteralDelimiters:
80
+ PreferredDelimiters:
81
+ '%i': '[]'
82
+ '%I': '[]'
83
+ '%w': '[]'
84
+ '%W': '[]'
85
+
86
+ Style/StringLiterals:
87
+ EnforcedStyle: double_quotes
88
+
89
+ Style/SymbolArray:
90
+ Enabled: true
91
+
92
+ Style/TrailingCommaInLiteral:
93
+ EnforcedStyleForMultiline: comma
94
+
95
+ Style/TrivialAccessors:
96
+ ExactNameMatch: true
97
+ AllowPredicates: true
@@ -0,0 +1,9 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.3.1
5
+ before_install: gem install bundler -v 1.12.5
6
+ script: bundle exec rake spec SPEC_OPTS="--format p"
7
+ addons:
8
+ code_climate:
9
+ repo_token: b4319eb676f021fb9473aa342324abd74f58d146e174f3cdf57f103bab052f25
@@ -0,0 +1,49 @@
1
+ # Contributor Code of Conduct
2
+
3
+ As contributors and maintainers of this project, and in the interest of
4
+ fostering an open and welcoming community, we pledge to respect all people who
5
+ contribute through reporting issues, posting feature requests, updating
6
+ documentation, submitting pull requests or patches, and other activities.
7
+
8
+ We are committed to making participation in this project a harassment-free
9
+ experience for everyone, regardless of level of experience, gender, gender
10
+ identity and expression, sexual orientation, disability, personal appearance,
11
+ body size, race, ethnicity, age, religion, or nationality.
12
+
13
+ Examples of unacceptable behavior by participants include:
14
+
15
+ * The use of sexualized language or imagery
16
+ * Personal attacks
17
+ * Trolling or insulting/derogatory comments
18
+ * Public or private harassment
19
+ * Publishing other's private information, such as physical or electronic
20
+ addresses, without explicit permission
21
+ * Other unethical or unprofessional conduct
22
+
23
+ Project maintainers have the right and responsibility to remove, edit, or
24
+ reject comments, commits, code, wiki edits, issues, and other contributions
25
+ that are not aligned to this Code of Conduct, or to ban temporarily or
26
+ permanently any contributor for other behaviors that they deem inappropriate,
27
+ threatening, offensive, or harmful.
28
+
29
+ By adopting this Code of Conduct, project maintainers commit themselves to
30
+ fairly and consistently applying these principles to every aspect of managing
31
+ this project. Project maintainers who do not follow or enforce the Code of
32
+ Conduct may be permanently removed from the project team.
33
+
34
+ This code of conduct applies both within project spaces and in public spaces
35
+ when an individual is representing the project or its community.
36
+
37
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
38
+ reported by contacting a project maintainer at nomoon@phoebus.ca. All
39
+ complaints will be reviewed and investigated and will result in a response that
40
+ is deemed necessary and appropriate to the circumstances. Maintainers are
41
+ obligated to maintain confidentiality with regard to the reporter of an
42
+ incident.
43
+
44
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
45
+ version 1.3.0, available at
46
+ [http://contributor-covenant.org/version/1/3/0/][version]
47
+
48
+ [homepage]: http://contributor-covenant.org
49
+ [version]: http://contributor-covenant.org/version/1/3/0/
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+ source "https://rubygems.org"
3
+
4
+ # Specify your gem's dependencies in sanscript.gemspec
5
+ gemspec
@@ -0,0 +1,22 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 learnsanskrit.org (original Javascript)
4
+ Copyright (c) 2016 ported by Tim Bellefleur (Ruby port)
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in
14
+ all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22
+ THE SOFTWARE.
@@ -0,0 +1,43 @@
1
+ # Sanscript.rb
2
+
3
+ [![Build Status](https://travis-ci.org/ubcsanskrit/sanscript.rb.svg?branch=master)](https://travis-ci.org/ubcsanskrit/sanscript.rb)
4
+ [![Code Climate](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/badges/gpa.svg)](https://codeclimate.com/github/ubcsanskrit/sanscript.rb)
5
+ [![Test Coverage](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/badges/coverage.svg)](https://codeclimate.com/github/ubcsanskrit/sanscript.rb/coverage)
6
+ [![Dependency Status](https://gemnasium.com/badges/github.com/ubcsanskrit/sanscript.rb.svg)](https://gemnasium.com/github.com/ubcsanskrit/sanscript.rb)
7
+
8
+ This gem is starting off as a mostly-straightforward port of [learnsanskrit.org's Sanscript.js](https://github.com/sanskrit/sanscript.js), and will go from there. It also incorporates transliteration scheme detection based on [learnsanskrit.org's Detect.js](https://github.com/sanskrit/detect.js).
9
+
10
+ ## Installation
11
+
12
+ Add this line to your application's Gemfile:
13
+
14
+ ```ruby
15
+ gem 'sanscript'
16
+ ```
17
+
18
+ And then execute:
19
+
20
+ $ bundle
21
+
22
+ Or install it yourself as:
23
+
24
+ $ gem install sanscript
25
+
26
+ ## Usage
27
+
28
+ You can access detection through `Sanscript.detect(text)` and transliteration through `Sanscript.transliterate(text, from, to)`. Code should be fairly straightforward and partially documented.
29
+
30
+ ## Development
31
+
32
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
33
+
34
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
35
+
36
+ ## Contributing
37
+
38
+ Bug reports and pull requests are welcome on GitHub at https://github.com/ubcsanskrit/sanscript. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
39
+
40
+
41
+ ## License
42
+
43
+ The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+ require "bundler/gem_tasks"
3
+ require "rspec/core/rake_task"
4
+
5
+ RSpec::Core::RakeTask.new(:spec)
6
+
7
+ task default: :spec
@@ -0,0 +1,12 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "sanscript"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ require "pry"
12
+ Pry.start
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+ require "sanscript/version"
3
+ require "sanscript/detect"
4
+ require "sanscript/transliterate"
5
+ require "sanscript/benchmark"
6
+
7
+ module Sanscript
8
+ module_function
9
+
10
+ # Proxies the Detect.detect_script method
11
+ #
12
+ def detect(text)
13
+ Detect.detect_script(text)
14
+ end
15
+
16
+ # The transliterate method accepts multiple signatures
17
+ # .transliterate(text, to) will auto-detect the source script
18
+ # .transliterate(text, to, from) will specify the source and target script
19
+ #
20
+ # Final Hash arguments are passed along as options.
21
+ #
22
+ def transliterate(text, first, second = nil, **options)
23
+ if second.nil?
24
+ second = first
25
+ first = Detect.detect_script(text)
26
+ end
27
+ Transliterate.transliterate(text, first, second, options)
28
+ end
29
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sanscript/refinements"
4
+ require "benchmark"
5
+
6
+ module Sanscript
7
+ using Refinements
8
+ module Benchmark
9
+ module_function
10
+
11
+ def detection!
12
+ n = 100_000
13
+ iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
14
+ deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
15
+
16
+ ::Benchmark.bmbm(18) do |x|
17
+ x.report("Detect IAST") do
18
+ n.times { raise unless Sanscript.detect(iast_string) == :iast }
19
+ end
20
+ x.report("Detect Devanagari") do
21
+ n.times { raise unless Sanscript.detect(deva_string) == :devanagari }
22
+ end
23
+ end
24
+ end
25
+
26
+ def transliteration!
27
+ n = 5_000
28
+ iast_string = "nānāśāstrasubhāṣitāmṛtarasaiḥ śrotrotsavaṃ kurvatāṃ yeṣāṃ yānti dināni paṇḍitajanavyāyāmakhinnātmanām teṣāṃ janma ca jīvitaṃ ca sukṛtaṃ tair eva bhūr bhūṣitā śeṣaih kiṃ paśuvad vivekarahitair bhūbhārabhūtair naraḥ"
29
+
30
+ deva_string = "नानाशास्त्रसुभाषितामृतरसैः श्रोत्रोत्सवं कुर्वतां येषां यान्ति दिनानि पण्डितजनव्यायामखिन्नात्मनाम् तेषां जन्म च जीवितं च सुकृतं तैर् एव भूर् भूषिता शेषैह् किं पशुवद् विवेकरहितैर् भूभारभूतैर् नरः"
31
+ ::Benchmark.bmbm(18) do |x|
32
+ x.report("IAST**>Devanagari") do
33
+ n.times { Sanscript.transliterate(iast_string, :devanagari) }
34
+ end
35
+ x.report("IAST==>Devanagari") do
36
+ n.times { Sanscript.transliterate(iast_string, :iast, :devanagari) }
37
+ end
38
+ x.report("IAST**>SLP1") do
39
+ n.times { Sanscript.transliterate(iast_string, :slp1) }
40
+ end
41
+ x.report("IAST==>SLP1") do
42
+ n.times { Sanscript.transliterate(iast_string, :iast, :slp1) }
43
+ end
44
+ x.report("Devanagari**>SLP1") do
45
+ n.times { Sanscript.transliterate(deva_string, :slp1) }
46
+ end
47
+ x.report("Devanagari**>IAST") do
48
+ n.times { Sanscript.transliterate(deva_string, :iast) }
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+ #
3
+ # Developed from code available @ https://github.com/sanskrit/detect.js
4
+ #
5
+ module Sanscript
6
+ module Detect
7
+ # Match any character in the block of Brahmic scripts
8
+ # between Devanagari and Malayalam.
9
+ RE_BRAHMIC_RANGE = /[\u0900-\u0d7f]/
10
+
11
+ # Match each individual Brahmic script.
12
+ RE_BRAHMIC_SCRIPTS = {
13
+ devanagari: /\p{Devanagari}/,
14
+ bengali: /\p{Bengali}/,
15
+ gurmukhi: /\p{Gurmukhi}/,
16
+ gujarati: /\p{Gujarati}/,
17
+ oriya: /\p{Oriya}/,
18
+ tamil: /\p{Tamil}/,
19
+ telugu: /\p{Telugu}/,
20
+ kannada: /\p{Kannada}/,
21
+ malayalam: /\p{Malayalam}/,
22
+ }.freeze
23
+
24
+ # Match on special Roman characters
25
+ RE_IAST_OR_KOLKATA_ONLY = /[āīūṛṝḷḹēōṃḥṅñṭḍṇśṣḻ]/i
26
+
27
+ # Match on Kolkata-specific Roman characters
28
+ RE_KOLKATA_ONLY = /[ēō]/i
29
+
30
+ # Match on ITRANS-only
31
+ RE_ITRANS_ONLY = /ee|oo|\^[iI]|RR[iI]|L[iI]|~N|N\^|Ch|chh|JN|sh|Sh|\.a/
32
+
33
+ # Match on SLP1-only characters and bigrams
34
+ RE_SLP1_ONLY = /[fFxXEOCYwWqQPB]|kz|Nk|Ng|tT|dD|Sc|Sn|[aAiIuUfFxXeEoO]R|G[yr]|(\\W|^)G'/
35
+
36
+ # Match on Velthuis-only characters
37
+ RE_VELTHUIS_ONLY = /\.[mhnrlntds]|"n|~s/
38
+
39
+ # Match on chars shared by ITRANS and Velthuis
40
+ RE_ITRANS_OR_VELTHUIS_ONLY = /aa|ii|uu|~n/
41
+
42
+ # Match on characters unavailable in Harvard-Kyoto
43
+ RE_HARVARD_KYOTO = /[aAiIuUeoRMHkgGcjJTDNtdnpbmyrlvzSsh]/
44
+
45
+ private_constant :RE_BRAHMIC_RANGE, :RE_BRAHMIC_SCRIPTS, :RE_IAST_OR_KOLKATA_ONLY,
46
+ :RE_KOLKATA_ONLY, :RE_ITRANS_ONLY, :RE_SLP1_ONLY, :RE_VELTHUIS_ONLY,
47
+ :RE_ITRANS_OR_VELTHUIS_ONLY, :RE_HARVARD_KYOTO
48
+
49
+ module_function
50
+
51
+ def detect_script(text)
52
+ # Brahmic schemes are all within a specific range of code points.
53
+ if text =~ RE_BRAHMIC_RANGE
54
+ RE_BRAHMIC_SCRIPTS.each do |script, regex|
55
+ return script if text =~ regex
56
+ end
57
+ end
58
+
59
+ # Romanizations
60
+ if text =~ RE_IAST_OR_KOLKATA_ONLY
61
+ text =~ RE_KOLKATA_ONLY ? :kolkata : :iast
62
+ elsif text =~ RE_ITRANS_ONLY
63
+ :itrans
64
+ elsif text =~ RE_SLP1_ONLY
65
+ :slp1
66
+ elsif text =~ RE_VELTHUIS_ONLY
67
+ :velthuis
68
+ elsif text =~ RE_ITRANS_OR_VELTHUIS_ONLY
69
+ :itrans
70
+ elsif text =~ RE_HARVARD_KYOTO
71
+ :hk
72
+ else
73
+ :unknown
74
+ end
75
+ end
76
+ end
77
+ end