medieval_latina 3.1.1 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f38ebdf6480281f444c3142a6c0dd8ef3b805c34ab45710dd6af46b904f5bbc5
4
- data.tar.gz: ec33854ab71c933af1da4fe44055ac94d0cf517894a60ebd9addbf42058ee7d5
3
+ metadata.gz: 47f39ae4c5ce8d0b6b624beae267c9a8e00b2ac44e6c09686e34e825d8671dcd
4
+ data.tar.gz: a8f8ca9f9357a0453429104a1d8e8423c63dbd3ab4360277993c29c46cb7573f
5
5
  SHA512:
6
- metadata.gz: 951bb7a9259f812296866752be7ff0b1a4526bf0a7521b72c6465de4e2a922887922f443070b8b990e4a57a8a48ee8e76a0c67b78720598890ab71173379e924
7
- data.tar.gz: 363b7fe7a6331397c0e11ade30f4c4ef6db456d3aac200919e308b318f08941df25c552fa095f4be6b3937c19a9ffd48e53fe1b172b83999a4b4e224506a837f
6
+ metadata.gz: d1e79d2981bdc926e0640de05a3d870ea907075f6c3c822be135c89f2060b1abaa386fe50fb4e61325e7d799ab7d216b33329ec58b35b6e9520bc4c1b7367300
7
+ data.tar.gz: 64dd8015e355efc41bdc754f3720c204dda502d64c0665dfed7830063f8ececb09f5eefefcfd9e520ee105676695f525a6fc1752e4be61e41a2e16f0c1b2eac9
@@ -0,0 +1,57 @@
1
+ name: Claude Code Review
2
+
3
+ on:
4
+ pull_request:
5
+ types: [opened, synchronize]
6
+ # Optional: Only run on specific file changes
7
+ # paths:
8
+ # - "src/**/*.ts"
9
+ # - "src/**/*.tsx"
10
+ # - "src/**/*.js"
11
+ # - "src/**/*.jsx"
12
+
13
+ jobs:
14
+ claude-review:
15
+ # Optional: Filter by PR author
16
+ # if: |
17
+ # github.event.pull_request.user.login == 'external-contributor' ||
18
+ # github.event.pull_request.user.login == 'new-developer' ||
19
+ # github.event.pull_request.author_association == 'FIRST_TIME_CONTRIBUTOR'
20
+
21
+ runs-on: ubuntu-latest
22
+ permissions:
23
+ contents: read
24
+ pull-requests: read
25
+ issues: read
26
+ id-token: write
27
+
28
+ steps:
29
+ - name: Checkout repository
30
+ uses: actions/checkout@v4
31
+ with:
32
+ fetch-depth: 1
33
+
34
+ - name: Run Claude Code Review
35
+ id: claude-review
36
+ uses: anthropics/claude-code-action@v1
37
+ with:
38
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
39
+ prompt: |
40
+ REPO: ${{ github.repository }}
41
+ PR NUMBER: ${{ github.event.pull_request.number }}
42
+
43
+ Please review this pull request and provide feedback on:
44
+ - Code quality and best practices
45
+ - Potential bugs or issues
46
+ - Performance considerations
47
+ - Security concerns
48
+ - Test coverage
49
+
50
+ Use the repository's CLAUDE.md for guidance on style and conventions. Be constructive and helpful in your feedback.
51
+
52
+ Use `gh pr comment` with your Bash tool to leave your review as a comment on the PR.
53
+
54
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
55
+ # or https://docs.claude.com/en/docs/claude-code/sdk#command-line for available options
56
+ claude_args: '--allowed-tools "Bash(gh issue view:*),Bash(gh search:*),Bash(gh issue list:*),Bash(gh pr comment:*),Bash(gh pr diff:*),Bash(gh pr view:*),Bash(gh pr list:*)"'
57
+
@@ -0,0 +1,50 @@
1
+ name: Claude Code
2
+
3
+ on:
4
+ issue_comment:
5
+ types: [created]
6
+ pull_request_review_comment:
7
+ types: [created]
8
+ issues:
9
+ types: [opened, assigned]
10
+ pull_request_review:
11
+ types: [submitted]
12
+
13
+ jobs:
14
+ claude:
15
+ if: |
16
+ (github.event_name == 'issue_comment' && contains(github.event.comment.body, '@claude')) ||
17
+ (github.event_name == 'pull_request_review_comment' && contains(github.event.comment.body, '@claude')) ||
18
+ (github.event_name == 'pull_request_review' && contains(github.event.review.body, '@claude')) ||
19
+ (github.event_name == 'issues' && (contains(github.event.issue.body, '@claude') || contains(github.event.issue.title, '@claude')))
20
+ runs-on: ubuntu-latest
21
+ permissions:
22
+ contents: read
23
+ pull-requests: read
24
+ issues: read
25
+ id-token: write
26
+ actions: read # Required for Claude to read CI results on PRs
27
+ steps:
28
+ - name: Checkout repository
29
+ uses: actions/checkout@v4
30
+ with:
31
+ fetch-depth: 1
32
+
33
+ - name: Run Claude Code
34
+ id: claude
35
+ uses: anthropics/claude-code-action@v1
36
+ with:
37
+ claude_code_oauth_token: ${{ secrets.CLAUDE_CODE_OAUTH_TOKEN }}
38
+
39
+ # This is an optional setting that allows Claude to read CI results on PRs
40
+ additional_permissions: |
41
+ actions: read
42
+
43
+ # Optional: Give a custom prompt to Claude. If this is not specified, Claude will perform the instructions specified in the comment that tagged it.
44
+ # prompt: 'Update the pull request description to include a summary of changes.'
45
+
46
+ # Optional: Add claude_args to customize behavior and configuration
47
+ # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md
48
+ # or https://docs.claude.com/en/docs/claude-code/sdk#command-line for available options
49
+ # claude_args: '--model claude-opus-4-1-20250805 --allowed-tools Bash(gh pr:*)'
50
+
@@ -22,6 +22,7 @@ jobs:
22
22
  - '3.2'
23
23
  - '3.3'
24
24
  - '3.4'
25
+ - '4.0'
25
26
  allow_failures:
26
27
  - false
27
28
  env:
data/.tool-versions CHANGED
@@ -1,2 +1,2 @@
1
1
  nodejs 24.2.0
2
- ruby 3.4.1
2
+ ruby 4.0.5
data/AGENTS.md ADDED
@@ -0,0 +1,60 @@
1
+ # AGENTS.md
2
+
3
+ This file provides guidance to Codex (Codex.ai/code) when working with code in this repository.
4
+
5
+ ## Development Commands
6
+
7
+ - **Setup**: `bin/setup` - Install dependencies with bundle install
8
+ - **Tests**: `rake spec` or `bundle exec rspec` - Run the full test suite
9
+ - **Linting**: `bin/lint` - Run StandardRB linter and jsonlint on dictionary.json
10
+ - **Build lexicons**: `bin/build` - Regenerate PLS lexicon files from dictionary.json
11
+ - **Console**: `bin/console` - Interactive prompt for experimentation
12
+ - **Install gem locally**: `bundle exec rake install`
13
+ - **Release**: `bundle exec rake release` (after updating version.rb)
14
+
15
+ ## Architecture Overview
16
+
17
+ This is a Ruby gem that converts medieval Latin text to phonetic English for text-to-speech engines. The architecture consists of:
18
+
19
+ ### Core Components
20
+
21
+ - **MedievalLatina class** (`lib/medieval_latina.rb`): Main interface with class methods for text conversion and linguistic analysis
22
+ - `MedievalLatina[text]` - Primary conversion method
23
+ - Part-of-speech helpers: `verb?`, `noun?`, `adjective?`, `adverb?`
24
+ - `pronunciations_for(words)` - Extract IPA pronunciations for lexicon building
25
+
26
+ - **Dictionary system** (`data/dictionary.json`): Large JSON file containing Latin words with metadata including:
27
+ - IPA pronunciations
28
+ - Part of speech classifications
29
+ - Custom pronunciation overrides
30
+
31
+ - **Lexicon generation** (`lib/medieval_latina/lexicon_builder.rb`, `lib/medieval_latina/lexicon.rb`): Creates PLS (Pronunciation Lexicon Specification) files for AWS Polly and other TTS engines
32
+
33
+ ### Phonetic Conversion Logic
34
+
35
+ The main conversion algorithm handles:
36
+ - Vowel teams: ae→ay, oe→ay, au→ou
37
+ - Consonant transformations: c→ch/k (soft/hard), g→j/g, j→y, t→ts/t, x→ks
38
+ - Consonant teams: gn→n-y, qu→kw
39
+ - Text preprocessing with I18n transliteration
40
+
41
+ ### Data Flow
42
+
43
+ 1. Text input → word tokenization → dictionary lookup
44
+ 2. If word has custom pronunciation → use it
45
+ 3. Otherwise → apply phonetic transformation rules
46
+ 4. Rejoin with proper punctuation spacing
47
+
48
+ ## Key Files
49
+
50
+ - `lib/medieval_latina.rb` - Main conversion logic and API
51
+ - `data/dictionary.json` - Latin word database (400KB+)
52
+ - `bin/build` - Splits dictionary into multiple PLS files in lexicons/ directory
53
+ - `.standard.yml` - StandardRB configuration (Ruby 3.2, parallel linting)
54
+ - `medieval_latina.gemspec` - Gem specification (requires Ruby >= 3.2.0)
55
+
56
+ ## Testing
57
+
58
+ - RSpec test suite in `spec/`
59
+ - Configuration in `.rspec` with documentation format
60
+ - Run specific tests: `bundle exec rspec spec/specific_spec.rb`
data/Gemfile CHANGED
@@ -2,3 +2,8 @@ source "https://rubygems.org"
2
2
 
3
3
  # Specify your gem's dependencies in medieval_latina.gemspec
4
4
  gemspec
5
+
6
+ # `logger` stopped being autoloaded under Bundler once it became a bundled gem
7
+ # (Ruby 3.4+) and is no longer in Ruby 4.0's default set. jsonlint requires it
8
+ # without declaring the dependency, so declare it here for `bundle exec jsonlint`.
9
+ gem "logger"
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- medieval_latina (3.1.1)
4
+ medieval_latina (3.2.0)
5
5
  i18n
6
6
 
7
7
  GEM
@@ -19,8 +19,9 @@ GEM
19
19
  optimist (~> 3)
20
20
  language_server-protocol (3.17.0.3)
21
21
  lint_roller (1.1.0)
22
+ logger (1.7.0)
22
23
  mini_portile2 (2.8.9)
23
- nokogiri (1.18.9)
24
+ nokogiri (1.19.3)
24
25
  mini_portile2 (~> 2.8.2)
25
26
  racc (~> 1.4)
26
27
  oj (3.16.5)
@@ -88,6 +89,7 @@ PLATFORMS
88
89
 
89
90
  DEPENDENCIES
90
91
  jsonlint
92
+ logger
91
93
  medieval_latina!
92
94
  nokogiri
93
95
  rake (~> 12.0)
data/README.md CHANGED
@@ -3,6 +3,22 @@
3
3
  There are good text-to-speech engines for English and classical Latin, but none for medieval Latin.
4
4
  `MedievalLatina` converts Latin text to a kind of phonetic spelling that can be read by English language text-to-speech engines.
5
5
 
6
+ ## Hear it
7
+
8
+ A line of the Lord's Prayer — *Pater noster qui es in caelis* — spoken by Amazon Polly using this gem's IPA pronunciation lexicon, so you hear MedievalLatina's pronunciation rather than raw Latin:
9
+
10
+ ▶️ **[Play the sample](https://github.com/jaysonvirissimo/medieval_latina/raw/master/audio/pater-noster.mp3)** (`audio/pater-noster.mp3`)
11
+
12
+ <!-- Inline player: drag-and-drop audio/pater-noster.mp3 into a GitHub PR/comment composer to
13
+ mint a https://github.com/user-attachments/assets/<id> URL, then paste that URL on its own
14
+ line directly below to render an inline audio player in the rendered README. -->
15
+
16
+ - **Text:** *Pater noster qui es in caelis* — "Our Father, who art in heaven", the traditional Latin Lord's Prayer (public domain).
17
+ - **Gem output:** `MedievalLatina["Pater noster qui es in caelis"]` → `"pah-tare nohstayr kwee es een chaylees"`.
18
+ - **Voice:** Amazon Polly **Bianca** (Italian, `it-IT`, neural engine), driven by MedievalLatina's IPA via a PLS lexicon.
19
+ - **Reproduce:** `ruby -Ilib bin/sample_audio Bianca` (requires AWS credentials in the environment and the `aws-sdk-polly` gem).
20
+ - **Provenance:** audio generated with Amazon Polly on 2026-06-07; Polly output may be used and redistributed under the [AWS Service Terms](https://aws.amazon.com/service-terms/).
21
+
6
22
  ## Installation
7
23
 
8
24
  Add this line to your application's Gemfile:
@@ -40,7 +56,8 @@ responsiveVoice.speak(sentence, "UK English Female");
40
56
  polly = Aws::Polly::Client.new
41
57
  s3 = Aws::S3::Client.new
42
58
 
43
- sentence = "PATER NOSTER qui es in caelis"
59
+ # Lowercase so the text matches the lexicon's (case-sensitive) lowercase graphemes.
60
+ sentence = "pater noster qui es in caelis"
44
61
 
45
62
  words = sentence.split(" ")
46
63
  pronunciations = MedievalLatina.pronunciations_for(words)
data/bin/sample_audio ADDED
@@ -0,0 +1,76 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Generates the README's spoken sample with Amazon Polly, using MedievalLatina's
5
+ # IPA pronunciations via a PLS lexicon so Polly pronounces the Latin our way.
6
+ #
7
+ # Usage:
8
+ # AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY must be in the environment.
9
+ # aws-sdk-polly must be available, e.g. run through a bundle that provides it:
10
+ # BUNDLE_GEMFILE=/path/to/aws/Gemfile bundle exec ruby -Ilib bin/sample_audio Joanna
11
+ # Output: tmp/sample-<voice>.mp3
12
+ #
13
+ # Credentials are read from the AWS SDK default chain only; they are never echoed,
14
+ # logged, or written to disk.
15
+
16
+ require "medieval_latina"
17
+ require "aws-sdk-polly"
18
+ require "fileutils"
19
+
20
+ # us-east-1 is where the voices/engines below are available; override via env if needed.
21
+ REGION = ENV["AWS_REGION"] || ENV["AWS_DEFAULT_REGION"] || "us-east-1"
22
+ # Polly matches lexicon graphemes case-sensitively and our graphemes are lowercase,
23
+ # so synthesize from lowercased text (the README still shows it capitalized).
24
+ SENTENCE = "pater noster qui es in caelis"
25
+
26
+ # Polly applies a lexicon only when its xml:lang matches the voice's language, and
27
+ # lexicons work on the standard/neural engines (not generative). Each voice below is
28
+ # paired with the matching lexicon language and a lexicon-capable engine.
29
+ VOICES = {
30
+ "Joanna" => {lang: "en-US", engine: "neural"},
31
+ "Matthew" => {lang: "en-US", engine: "neural"},
32
+ "Danielle" => {lang: "en-US", engine: "neural"},
33
+ "Stephen" => {lang: "en-US", engine: "neural"},
34
+ "Bianca" => {lang: "it-IT", engine: "neural"},
35
+ "Carla" => {lang: "it-IT", engine: "standard"},
36
+ "Giorgio" => {lang: "it-IT", engine: "standard"}
37
+ }.freeze
38
+
39
+ LEXICON_NAMES = {"en-US" => "MedievalLatinaEnUs", "it-IT" => "MedievalLatinaItIt"}.freeze
40
+
41
+ abort "Usage: #{$PROGRAM_NAME} <voice>\nChoices: #{VOICES.keys.join(", ")}" if ARGV.empty?
42
+
43
+ voice = ARGV[0]
44
+ config = VOICES[voice]
45
+ abort "Unknown voice #{voice.inspect}. Choose one of: #{VOICES.keys.join(", ")}" unless config
46
+
47
+ # Build the {word => IPA} map from the gem's dictionary for every word in the sentence.
48
+ pronunciations = MedievalLatina.pronunciations_for(SENTENCE.split)
49
+
50
+ # Build a lexicon whose xml:lang matches the voice's language (Polly applies a lexicon
51
+ # only when the languages match).
52
+ lang = config[:lang]
53
+ lexicon_name = LEXICON_NAMES[lang]
54
+ lexicon = MedievalLatina::LexiconBuilder.new(pronunciations, lang: lang).call.to_s
55
+
56
+ polly = Aws::Polly::Client.new(region: REGION)
57
+ polly.put_lexicon(name: lexicon_name, content: lexicon) # idempotent; overwrites
58
+
59
+ FileUtils.mkdir_p("tmp")
60
+ output_path = "tmp/sample-#{voice}.mp3"
61
+
62
+ # Always remove the lexicon from the AWS account, even if synthesis/write raises.
63
+ begin
64
+ response = polly.synthesize_speech(
65
+ text: SENTENCE,
66
+ lexicon_names: [lexicon_name],
67
+ voice_id: voice,
68
+ engine: config[:engine],
69
+ output_format: "mp3"
70
+ )
71
+ File.binwrite(output_path, response.audio_stream.read)
72
+ ensure
73
+ polly.delete_lexicon(name: lexicon_name)
74
+ end
75
+
76
+ puts "#{voice} (#{lang}, #{config[:engine]}) -> #{output_path} (#{File.size(output_path)} bytes)"