apacify 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/CLAUDE.md +36 -0
- data/lib/apacify/titleizer.rb +2 -8
- data/lib/apacify/token.rb +43 -9
- data/lib/apacify/tokenizer.rb +12 -9
- data/lib/apacify/version.rb +1 -1
- data/lib/apacify.rb +16 -4
- metadata +3 -4
- data/config/minor.yml +0 -28
- data/lib/apacify/error.rb +0 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7d47b70beff46216daa1ff9aef7c7fe770368089d28b79bc0dc094ab4091252f
|
|
4
|
+
data.tar.gz: 862dae468db6d27bbd09e05056c4861950d8ce5629f27cc5c41cdc6eed336559
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f3a52d9a875a59bd0d642aa9f57703a017ddd6f2fe39c30942bd674c24af783ecba4c84f7f09f74751b6b451cf6078c518f71b71c8fac97bdace5931be7afb40
|
|
7
|
+
data.tar.gz: 9244be4b802c94724b124cfc32c08bbc3ad7ac3f679db0b855a13174376fa3041587d60143985c3772996e26a2b1c2e8f58ebbd32d18218a8cef856229279fef
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
## [0.4.0] - 2026-03-01
|
|
2
|
+
|
|
3
|
+
- Lowercase second element after hyphenated prefixes per APA 7th ed (e.g., Mid-century, Pre-war)
|
|
4
|
+
- Preserve proper nouns after prefixes when input is already capitalized (e.g., Pre-Christian)
|
|
5
|
+
|
|
6
|
+
## [0.3.0] - 2025-08-30
|
|
7
|
+
|
|
8
|
+
- Support Roman numerals
|
|
9
|
+
|
|
1
10
|
## [0.2.0] - 2025-08-30
|
|
2
11
|
|
|
3
12
|
- Add ability to specify words to ignore during title case conversion
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Project
|
|
6
|
+
|
|
7
|
+
Apacify is a Ruby gem that converts strings to APA-style title case. It extends `String` with `.apacify(ignore: [])`.
|
|
8
|
+
|
|
9
|
+
## Commands
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
rake # Run tests + linter (default)
|
|
13
|
+
rake test # Tests only (minitest)
|
|
14
|
+
rake standard # Lint only (Standard/RuboCop)
|
|
15
|
+
ruby -Ilib test/test_apacify.rb # Run test file directly
|
|
16
|
+
ruby -Ilib test/test_apacify.rb -n test_method_name # Single test
|
|
17
|
+
bin/console # IRB with gem loaded
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Architecture
|
|
21
|
+
|
|
22
|
+
Entry point: `lib/apacify.rb` — loads `config/minor.yml`, defines `Apacify.titleize`, patches `String#apacify`.
|
|
23
|
+
|
|
24
|
+
Pipeline: **input string → Tokenizer → Token[] → Titleizer → output string**
|
|
25
|
+
|
|
26
|
+
- `Tokenizer` splits on word boundaries (spaces + punctuation), implements `Enumerable`
|
|
27
|
+
- `Token` represents a single unit; knows if it's a minor word, punctuation, first/last, Roman numeral, hyphenated
|
|
28
|
+
- `Titleizer` walks tokens and applies capitalization rules via `should_capitalize?`
|
|
29
|
+
|
|
30
|
+
## APA Title Case Rules
|
|
31
|
+
|
|
32
|
+
1. Always capitalize first word and words after sentence-ending punctuation (`:`, `.`, `!`, `?`, `—`)
|
|
33
|
+
2. Capitalize all major words (4+ letters always qualify)
|
|
34
|
+
3. Minor words (≤3 letters, listed in `config/minor.yml`) stay lowercase unless rule 1 applies
|
|
35
|
+
4. Hyphenated parts each get capitalized independently
|
|
36
|
+
5. `ignore:` parameter preserves original case (case-sensitive matching)
|
data/lib/apacify/titleizer.rb
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
require "yaml"
|
|
2
|
-
|
|
3
1
|
module Apacify
|
|
4
2
|
class Titleizer
|
|
5
3
|
attr_reader :tokens, :ignore
|
|
@@ -27,7 +25,7 @@ module Apacify
|
|
|
27
25
|
return false if ignored_word?(token)
|
|
28
26
|
|
|
29
27
|
token.first? ||
|
|
30
|
-
tokens.
|
|
28
|
+
tokens.previous_punctuation(token)&.sentence_ending_punctuation? ||
|
|
31
29
|
!token.minor_word? ||
|
|
32
30
|
token.long?
|
|
33
31
|
end
|
|
@@ -39,12 +37,8 @@ module Apacify
|
|
|
39
37
|
token_string = token.string.strip
|
|
40
38
|
|
|
41
39
|
ignore.any? do |ignore_word|
|
|
42
|
-
|
|
43
|
-
if token_string == ignore_word
|
|
44
|
-
return true
|
|
45
|
-
end
|
|
40
|
+
return true if token_string == ignore_word
|
|
46
41
|
|
|
47
|
-
# Check if ignore_word contains punctuation and token matches the word part (case-sensitive)
|
|
48
42
|
if ignore_word.match?(/[.!?:—()]/)
|
|
49
43
|
word_part = ignore_word.gsub(/[.!?:—()]+/, "")
|
|
50
44
|
if token_string == word_part
|
data/lib/apacify/token.rb
CHANGED
|
@@ -1,20 +1,40 @@
|
|
|
1
|
-
require "forwardable"
|
|
2
|
-
|
|
3
1
|
module Apacify
|
|
4
2
|
class Token
|
|
5
|
-
extend Forwardable
|
|
6
|
-
|
|
7
3
|
attr_reader :string, :index
|
|
8
4
|
|
|
9
|
-
def_delegator :string, :downcase
|
|
10
|
-
|
|
11
5
|
def initialize(string, index)
|
|
12
6
|
@string = string
|
|
13
7
|
@index = index
|
|
14
8
|
end
|
|
15
9
|
|
|
16
10
|
def capitalize_word_parts
|
|
17
|
-
string.
|
|
11
|
+
parts = string.split("-", -1)
|
|
12
|
+
after_prefix = false
|
|
13
|
+
parts.map! do |part|
|
|
14
|
+
word = part[/\w+/]
|
|
15
|
+
unless word
|
|
16
|
+
after_prefix = false
|
|
17
|
+
next part
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
prefix = part[0, part.index(word)]
|
|
21
|
+
suffix = part[(prefix.length + word.length)..]
|
|
22
|
+
|
|
23
|
+
capitalized = if all_caps?(word)
|
|
24
|
+
word
|
|
25
|
+
elsif roman_numeral?(word)
|
|
26
|
+
word.upcase
|
|
27
|
+
elsif after_prefix
|
|
28
|
+
capitalized?(word) ? word : word.downcase
|
|
29
|
+
else
|
|
30
|
+
word.downcase.capitalize
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
after_prefix ||= PREFIXES.include?(word.downcase)
|
|
34
|
+
|
|
35
|
+
"#{prefix}#{capitalized}#{suffix}"
|
|
36
|
+
end
|
|
37
|
+
parts.join("-")
|
|
18
38
|
end
|
|
19
39
|
|
|
20
40
|
def first?
|
|
@@ -38,7 +58,7 @@ module Apacify
|
|
|
38
58
|
end
|
|
39
59
|
|
|
40
60
|
def sentence_ending_punctuation?
|
|
41
|
-
string.match?(
|
|
61
|
+
string.match?(PUNCTUATION_PATTERN)
|
|
42
62
|
end
|
|
43
63
|
|
|
44
64
|
def to_s
|
|
@@ -46,7 +66,21 @@ module Apacify
|
|
|
46
66
|
end
|
|
47
67
|
|
|
48
68
|
def whitespace_or_punctuation?
|
|
49
|
-
string.match?(/\s
|
|
69
|
+
string.match?(/\A(?:\s|#{PUNCTUATION_CHARS})+\s*\z/o)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
private
|
|
73
|
+
|
|
74
|
+
def all_caps?(word)
|
|
75
|
+
word.match?(/\A[A-Z]+\z/)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def capitalized?(word)
|
|
79
|
+
word[0] == word[0].upcase
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def roman_numeral?(word)
|
|
83
|
+
word.match?(/\A(?:M{0,4}(?:CM|CD|D?C{0,3})(?:XC|XL|L?X{0,3})(?:IX|IV|V?I{0,3}))\z/i)
|
|
50
84
|
end
|
|
51
85
|
end
|
|
52
86
|
end
|
data/lib/apacify/tokenizer.rb
CHANGED
|
@@ -2,13 +2,13 @@ module Apacify
|
|
|
2
2
|
class Tokenizer
|
|
3
3
|
include Enumerable
|
|
4
4
|
|
|
5
|
-
attr_reader :tokens
|
|
5
|
+
attr_reader :tokens
|
|
6
6
|
|
|
7
7
|
def initialize(string)
|
|
8
8
|
@tokens = string
|
|
9
9
|
.split(word_boundary_pattern)
|
|
10
10
|
.map
|
|
11
|
-
.with_index(
|
|
11
|
+
.with_index { |token, index| Token.new(token, index) }
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
def [](index)
|
|
@@ -21,18 +21,21 @@ module Apacify
|
|
|
21
21
|
tokens.each(&block)
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
-
def
|
|
25
|
-
|
|
24
|
+
def previous_punctuation(token)
|
|
25
|
+
index = token.index - 1
|
|
26
|
+
while index >= 0
|
|
27
|
+
prev_token = tokens[index]
|
|
28
|
+
return prev_token if prev_token.sentence_ending_punctuation?
|
|
29
|
+
break unless prev_token.whitespace_or_punctuation?
|
|
30
|
+
index -= 1
|
|
31
|
+
end
|
|
32
|
+
nil
|
|
26
33
|
end
|
|
27
34
|
|
|
28
35
|
private
|
|
29
36
|
|
|
30
|
-
def instantiate
|
|
31
|
-
->(token, index) { Token.new(token, index) }
|
|
32
|
-
end
|
|
33
|
-
|
|
34
37
|
def word_boundary_pattern
|
|
35
|
-
|
|
38
|
+
WORD_BOUNDARY_PATTERN
|
|
36
39
|
end
|
|
37
40
|
end
|
|
38
41
|
end
|
data/lib/apacify/version.rb
CHANGED
data/lib/apacify.rb
CHANGED
|
@@ -1,13 +1,25 @@
|
|
|
1
|
-
require_relative "apacify/error"
|
|
2
1
|
require_relative "apacify/titleizer"
|
|
3
2
|
require_relative "apacify/token"
|
|
4
3
|
require_relative "apacify/tokenizer"
|
|
5
4
|
require_relative "apacify/version"
|
|
6
5
|
|
|
7
6
|
module Apacify
|
|
8
|
-
MINOR_WORDS =
|
|
9
|
-
|
|
10
|
-
|
|
7
|
+
MINOR_WORDS = %w[
|
|
8
|
+
a an and as at but by de for from
|
|
9
|
+
if in nor of off on or out pas per
|
|
10
|
+
so the to up via with yet
|
|
11
|
+
].freeze
|
|
12
|
+
|
|
13
|
+
PREFIXES = %w[
|
|
14
|
+
anti co counter ex extra infra inter intra
|
|
15
|
+
macro mega meta micro mid mini multi neo
|
|
16
|
+
non over post pre pro proto pseudo quasi
|
|
17
|
+
re semi sub super supra trans ultra un under
|
|
18
|
+
].to_set.freeze
|
|
19
|
+
|
|
20
|
+
PUNCTUATION_CHARS = '[.!?:—()\[\]]'
|
|
21
|
+
PUNCTUATION_PATTERN = /#{PUNCTUATION_CHARS}+\s*/
|
|
22
|
+
WORD_BOUNDARY_PATTERN = /(\s+|#{PUNCTUATION_CHARS}+\s*)/
|
|
11
23
|
|
|
12
24
|
def self.titleize(string, ignore: [])
|
|
13
25
|
Titleizer.new(string, ignore:).titleize
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: apacify
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ariel Rzezak
|
|
@@ -19,13 +19,12 @@ extra_rdoc_files: []
|
|
|
19
19
|
files:
|
|
20
20
|
- ".standard.yml"
|
|
21
21
|
- CHANGELOG.md
|
|
22
|
+
- CLAUDE.md
|
|
22
23
|
- CODE_OF_CONDUCT.md
|
|
23
24
|
- LICENSE.txt
|
|
24
25
|
- README.md
|
|
25
26
|
- Rakefile
|
|
26
|
-
- config/minor.yml
|
|
27
27
|
- lib/apacify.rb
|
|
28
|
-
- lib/apacify/error.rb
|
|
29
28
|
- lib/apacify/titleizer.rb
|
|
30
29
|
- lib/apacify/token.rb
|
|
31
30
|
- lib/apacify/tokenizer.rb
|
|
@@ -51,7 +50,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
51
50
|
- !ruby/object:Gem::Version
|
|
52
51
|
version: '0'
|
|
53
52
|
requirements: []
|
|
54
|
-
rubygems_version:
|
|
53
|
+
rubygems_version: 4.0.3
|
|
55
54
|
specification_version: 4
|
|
56
55
|
summary: Convert strings to title case following APA style guidelines.
|
|
57
56
|
test_files: []
|
data/config/minor.yml
DELETED
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
- "a"
|
|
2
|
-
- "an"
|
|
3
|
-
- "and"
|
|
4
|
-
- "as"
|
|
5
|
-
- "at"
|
|
6
|
-
- "but"
|
|
7
|
-
- "by"
|
|
8
|
-
- "de"
|
|
9
|
-
- "for"
|
|
10
|
-
- "for"
|
|
11
|
-
- "from"
|
|
12
|
-
- "if"
|
|
13
|
-
- "in"
|
|
14
|
-
- "nor"
|
|
15
|
-
- "of"
|
|
16
|
-
- "off"
|
|
17
|
-
- "on"
|
|
18
|
-
- "or"
|
|
19
|
-
- "out"
|
|
20
|
-
- "pas"
|
|
21
|
-
- "per"
|
|
22
|
-
- "so"
|
|
23
|
-
- "the"
|
|
24
|
-
- "to"
|
|
25
|
-
- "up"
|
|
26
|
-
- "via"
|
|
27
|
-
- "with"
|
|
28
|
-
- "yet"
|
data/lib/apacify/error.rb
DELETED