cjk_auto_space 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 46f97e06e96ab1a04cb91ef23adac2300581af29d99b40485e98dd5059d6ba9b
4
+ data.tar.gz: 9a0d0d19b838185be702124b44c2f3a3c4a26a75036daf5195e3a6e96182d3ff
5
+ SHA512:
6
+ metadata.gz: f60f7c271f76cffa17ddfa7b213303ed5de88b699e891978372c27d181ef610f68443d2352692c8bb78a22826e00a3a9f6903261bf9c941094cdc05af7b79ad2
7
+ data.tar.gz: 3004cea02af4e227af18a0c7be436e844f638e49f80f6f156b2c7d799604c4c6f105540e69f99a3dceab01311cea69643d3c2f8ef48f38627f4c2aa79f882ec1
data/README.md ADDED
@@ -0,0 +1,8 @@
1
+ rubygem-cjk_auto_space
2
+ ------
3
+
4
+ A rubygem that automatically add sapce(U+0020) between CJK and other unicode code points, for better reading experience, with Markdown syntax concerned.
5
+
6
+ Usage:
7
+
8
+ "你好world哈哈".cjk_auto_space // => "你好 world 哈哈"
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ require "rake/extensiontask"
2
+ require "rake/testtask"
3
+
4
+ Rake::ExtensionTask.new "cjk_auto_space" do |ext|
5
+ ext.lib_dir = "lib/cjk_auto_space"
6
+ end
7
+
8
+ Rake::TestTask.new do |t|
9
+ t.libs << "test"
10
+ end
11
+
12
+ desc "Run tests"
13
+ task default::test
@@ -0,0 +1,177 @@
1
+ /*
2
+ gcc ./cjkpad.c -o cjkpad `pkg-config --libs --cflags icu-uc icu-io`
3
+ */
4
+ #include <stdio.h>
5
+ #include <stdlib.h>
6
+ #include <string.h>
7
+ #include <unicode/utext.h>
8
+ /*#include <unicode/ustdio.h>*/
9
+ #include "ruby.h"
10
+ #include "extconf.h"
11
+
12
+ static UErrorCode err = U_ZERO_ERROR;
13
+
14
+ UBool u_iscjk(int ublock);
15
+
16
+ typedef struct gra {
17
+ UChar32 cp; // code point
18
+ UBlockCode block; // unicode block
19
+ size_t start; // starting pos at the str[]
20
+ size_t length; // number of char needed (cjk is mostly 3 char)
21
+ bool cjk;
22
+ } gra;
23
+
24
+ UBool u_isMarkdown(gra* g, bool prefix);
25
+
26
+ char* padCjk(const char* str) {
27
+ char* formatted = calloc(1024, sizeof(char));
28
+
29
+ gra* graphemes = malloc(1024 * sizeof(gra));
30
+ int graLen; // number of graphemes
31
+
32
+ UText *ut = utext_openUTF8(NULL, str, -1, &err);
33
+
34
+ int begin = 0;
35
+ int end;
36
+ int i = 0;
37
+ int blk;
38
+ int filled = 0;
39
+
40
+ for (UChar32 cp = utext_next32From(ut, 0);
41
+ cp > -1;
42
+ cp = utext_next32(ut), i++) {
43
+ end = utext_getNativeIndex(ut);
44
+ graphemes[i].cp = cp;
45
+ blk = ublock_getCode(cp);
46
+ graphemes[i].block = blk;
47
+ graphemes[i].start = begin;
48
+ graphemes[i].length = end - begin;
49
+ graphemes[i].cjk = u_iscjk(blk);
50
+
51
+ begin = end;
52
+ }
53
+
54
+ graLen = i;
55
+
56
+ filled += graphemes[0].length;
57
+ strncat(formatted, &str[graphemes[0].start], graphemes[0].length);
58
+
59
+ for (int j = 1; j <= graLen; j++) {
60
+ gra *prev = &graphemes[j-1];
61
+ gra *curr = &graphemes[j];
62
+ gra *next = &graphemes[j+1];
63
+
64
+ //u_printf("%C%C%C\n", prev->cp, curr->cp, next->cp);
65
+
66
+ if ((u_isMarkdown(curr, true) && !prev->cjk && !u_isspace(prev->cp) && next->cjk) || // d*哈
67
+ (!prev->cjk && !u_isspace(prev->cp) && !u_isMarkdown(prev, true) && curr->cjk) || // *哈
68
+ (!curr->cjk && !u_isspace(curr->cp) && !u_isMarkdown(curr, false) && prev->cjk)) { // 哈*
69
+ filled++;
70
+ if (filled == strlen(formatted)-1) {
71
+ formatted = realloc(formatted, strlen(formatted) * 2 * sizeof(char));
72
+ }
73
+ strcat(formatted, " ");
74
+ }
75
+ filled += curr->length;
76
+ if (filled == strlen(formatted)-1) {
77
+ formatted = realloc(formatted, strlen(formatted) * 2 * sizeof(char));
78
+ }
79
+ strncat(formatted, &str[curr->start], curr->length);
80
+
81
+ if (u_isMarkdown(curr, false) && prev->cjk && !u_isspace(next->cp) && !next->cjk) { // 哈*n
82
+ filled++;
83
+ if (filled == strlen(formatted)-1) {
84
+ formatted = realloc(formatted, strlen(formatted) * 2 * sizeof(char));
85
+ }
86
+ strcat(formatted, " ");
87
+ }
88
+ }
89
+
90
+ //printf("\n%s\n", formatted);
91
+ free(graphemes);
92
+
93
+ return formatted;
94
+ }
95
+
96
+ UBool u_isMarkdown(gra* g, bool prefix) {
97
+ UBool markdown;
98
+
99
+ switch (g->cp) {
100
+ case 0x005F: // _
101
+ case 0x002A: // *
102
+ case 0x0060: // `
103
+ markdown = 1;
104
+ break;
105
+ case 0x003E:
106
+ case 0x005B:
107
+ if (prefix) {
108
+ markdown = 1;
109
+ break;
110
+ }
111
+ case 0x005D:
112
+ if (!prefix) {
113
+ markdown = 1;
114
+ break;
115
+ }
116
+ default:
117
+ markdown = 0;
118
+ break;
119
+ };
120
+ return markdown;
121
+ }
122
+
123
+ UBool u_iscjk(int ublock) {
124
+ UBool cjk;
125
+
126
+ switch (ublock) {
127
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS:
128
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A:
129
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B:
130
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C:
131
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D:
132
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E:
133
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F:
134
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G:
135
+ case UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H:
136
+ case UBLOCK_CJK_RADICALS_SUPPLEMENT:
137
+ case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
138
+ case UBLOCK_HIRAGANA:
139
+ case UBLOCK_KATAKANA:
140
+ case UBLOCK_BOPOMOFO:
141
+ case UBLOCK_BOPOMOFO_EXTENDED:
142
+ case UBLOCK_KANBUN:
143
+ case UBLOCK_CJK_STROKES:
144
+ case UBLOCK_KATAKANA_PHONETIC_EXTENSIONS:
145
+ case UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS:
146
+ case UBLOCK_CJK_COMPATIBILITY:
147
+ case UBLOCK_HANGUL_JAMO:
148
+ case UBLOCK_HANGUL_COMPATIBILITY_JAMO:
149
+ case UBLOCK_HANGUL_JAMO_EXTENDED_A:
150
+ case UBLOCK_HANGUL_SYLLABLES:
151
+ case UBLOCK_HANGUL_JAMO_EXTENDED_B:
152
+ case UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS:
153
+ case UBLOCK_CJK_COMPATIBILITY_FORMS:
154
+ case UBLOCK_KANA_EXTENDED_B:
155
+ case UBLOCK_KANA_SUPPLEMENT:
156
+ case UBLOCK_KANA_EXTENDED_A:
157
+ case UBLOCK_SMALL_KANA_EXTENSION:
158
+ cjk = 1;
159
+ break;
160
+ default:
161
+ cjk = 0;
162
+ break;
163
+ };
164
+ return cjk;
165
+ }
166
+
167
+ static VALUE rb_pad_cjk(VALUE self) {
168
+ Check_Type(self, T_STRING);
169
+
170
+ char* in = StringValueCStr(self);
171
+ char* out = padCjk(in);
172
+ return rb_str_new_cstr(out);
173
+ }
174
+
175
+ void Init_cjk_auto_space(void) {
176
+ rb_define_method(rb_cString, "cjk_auto_space", rb_pad_cjk, 0);
177
+ }
@@ -0,0 +1,10 @@
1
+ require 'mkmf'
2
+
3
+ dir_config("/usr/lib64", "/usr/lib", "/usr/include/unicode")
4
+
5
+ abort("missing \"unicode/utext.h\"") unless find_header("unicode/utext.h")
6
+ find_library("icuuc", "utext_openUTF8", "/usr/lib64", "/usr/lib")
7
+ abort("missing \"utext_openUTF8\"") unless have_library("icuuc", "utext_openUTF8", "unicode/utext.h")
8
+
9
+ create_header
10
+ create_makefile 'cjk_auto_space/cjk_auto_space'
@@ -0,0 +1,12 @@
1
+ begin
2
+ ruby_version = /(\d+\.\d+)/.match(::RUBY_VERSION)
3
+ require_relative "cjk_auto_space/#{ruby_version}/cjk_auto_space"
4
+ rescue LoadError
5
+ require "cjk_auto_space/cjk_auto_space"
6
+ end
7
+
8
+ String.class_eval do
9
+ def cjk_auto_space!
10
+ sub!(self, self.cjk_auto_space)
11
+ end
12
+ end
metadata ADDED
@@ -0,0 +1,52 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cjk_auto_space
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Marguerite Su
8
+ - Shenlebantongying
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2023-01-22 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: A rubygem that automatically add sapce(U+0020) between CJK and other
15
+ unicode code points, for better reading experience, with Markdown syntax concerned.
16
+ email:
17
+ - marguerite@opensuse.org
18
+ - shenlebantongying@gmail.com
19
+ executables: []
20
+ extensions:
21
+ - ext/cjk_auto_space/extconf.rb
22
+ extra_rdoc_files: []
23
+ files:
24
+ - README.md
25
+ - Rakefile
26
+ - ext/cjk_auto_space/cjk_auto_space.c
27
+ - ext/cjk_auto_space/extconf.rb
28
+ - lib/cjk_auto_space.rb
29
+ homepage: https://github.com/openSUSE-zh/rubygem-cjk_auto_space
30
+ licenses:
31
+ - MIT
32
+ metadata: {}
33
+ post_install_message:
34
+ rdoc_options: []
35
+ require_paths:
36
+ - lib
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ version: '0'
42
+ required_rubygems_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ requirements: []
48
+ rubygems_version: 3.4.1
49
+ signing_key:
50
+ specification_version: 4
51
+ summary: Automatically add spacing between CJK and others with MarkDown syntax concerned
52
+ test_files: []