xlsform2lstsv 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,141 @@
1
+ /**
2
+ * @file Utility functions for handling multiple language support in XLSForms
3
+ */
4
+ /**
5
+ * Common language codes based on IANA language subtag registry
6
+ * This is a subset of valid 2-letter language codes for validation purposes
7
+ */
8
+ const VALID_LANGUAGE_CODES = new Set([
9
+ 'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av',
10
+ 'ay', 'az', 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo',
11
+ 'br', 'bs', 'ca', 'ce', 'ch', 'co', 'cr', 'cs', 'cu', 'cv',
12
+ 'cy', 'da', 'de', 'dv', 'dz', 'ee', 'el', 'en', 'eo', 'es',
13
+ 'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga',
14
+ 'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
15
+ 'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik',
16
+ 'io', 'is', 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj',
17
+ 'kk', 'kl', 'km', 'kn', 'ko', 'kr', 'ks', 'ku', 'kv', 'kw',
18
+ 'ky', 'la', 'lb', 'lg', 'li', 'ln', 'lo', 'lt', 'lu', 'lv',
19
+ 'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my',
20
+ 'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
21
+ 'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps',
22
+ 'pt', 'qu', 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd',
23
+ 'se', 'sg', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr',
24
+ 'ss', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti',
25
+ 'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw', 'ty', 'ug',
26
+ 'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
27
+ 'yo', 'za', 'zh', 'zu'
28
+ ]);
29
+ /**
30
+ * Extract language code from column header (e.g., "label::English (en)" -> "en")
31
+ */
32
+ export function extractLanguageCode(header) {
33
+ // Pattern: label::Language Name (code) or label::code
34
+ // Handle both "label::English (en)" and "label::Español (es)" formats
35
+ const match = header.match(/::\s*[^)]+\(([a-z]{2})\)/i);
36
+ if (match && match[1]) {
37
+ return match[1].toLowerCase();
38
+ }
39
+ // Fallback: label::code (without parentheses)
40
+ const simpleMatch = header.match(/::\s*([a-z]{2})\b/i);
41
+ if (simpleMatch && simpleMatch[1]) {
42
+ return simpleMatch[1].toLowerCase();
43
+ }
44
+ return null;
45
+ }
46
+ /**
47
+ * Extract base column name from language-specific header (e.g., "label::English (en)" -> "label")
48
+ */
49
+ export function extractBaseColumnName(header) {
50
+ // Remove everything after ::
51
+ return header.split('::')[0].trim();
52
+ }
53
+ /**
54
+ * Get all language codes from headers
55
+ */
56
+ export function getLanguageCodesFromHeaders(headers) {
57
+ const languageCodes = new Set();
58
+ for (const header of headers) {
59
+ const code = extractLanguageCode(header);
60
+ if (code) {
61
+ languageCodes.add(code);
62
+ }
63
+ }
64
+ return Array.from(languageCodes);
65
+ }
66
+ /**
67
+ * Get language-specific value from row for a given base column and language code
68
+ */
69
+ export function getLanguageSpecificValue(row, baseColumn, languageCode) {
70
+ for (const [key, value] of Object.entries(row)) {
71
+ const headerCode = extractLanguageCode(key);
72
+ const headerBase = extractBaseColumnName(key);
73
+ if (headerBase === baseColumn && headerCode === languageCode) {
74
+ return value;
75
+ }
76
+ }
77
+ return undefined;
78
+ }
79
+ /**
80
+ * Get all language-specific values for a base column
81
+ */
82
+ export function getAllLanguageValues(row, baseColumn) {
83
+ const result = {};
84
+ for (const [key, value] of Object.entries(row)) {
85
+ const headerCode = extractLanguageCode(key);
86
+ const headerBase = extractBaseColumnName(key);
87
+ if (headerBase === baseColumn && headerCode && value) {
88
+ result[headerCode] = value;
89
+ }
90
+ }
91
+ return result;
92
+ }
93
+ /**
94
+ * Check if a header is language-specific
95
+ */
96
+ export function isLanguageSpecificHeader(header) {
97
+ return extractLanguageCode(header) !== null;
98
+ }
99
+ /**
100
+ * Validate a language code against IANA language subtag registry
101
+ * @param code Language code to validate (e.g., 'en', 'es')
102
+ * @returns true if valid, false otherwise
103
+ */
104
+ export function isValidLanguageCode(code) {
105
+ if (!code || typeof code !== 'string')
106
+ return false;
107
+ // Convert to lowercase and trim
108
+ const normalizedCode = code.toLowerCase().trim();
109
+ // Must be exactly 2 characters
110
+ if (normalizedCode.length !== 2)
111
+ return false;
112
+ // Must contain only letters
113
+ if (!/^[a-z]{2}$/.test(normalizedCode))
114
+ return false;
115
+ // Check against known valid language codes
116
+ return VALID_LANGUAGE_CODES.has(normalizedCode);
117
+ }
118
+ /**
119
+ * Validate all language codes in a set and return invalid ones
120
+ * @param languageCodes Array of language codes to validate
121
+ * @returns Array of invalid language codes found
122
+ */
123
+ export function validateLanguageCodes(languageCodes) {
124
+ return languageCodes.filter(code => !isValidLanguageCode(code));
125
+ }
126
+ /**
127
+ * Get the base language from settings (fallback to 'en')
128
+ */
129
+ export function getBaseLanguage(settings) {
130
+ const defaultLanguage = settings.default_language;
131
+ if (defaultLanguage && typeof defaultLanguage === 'string') {
132
+ // Try to extract language code from formats like "Spanish (es)" or "English (en)"
133
+ const match = defaultLanguage.match(/\(([a-z]{2})\)/i);
134
+ if (match && match[1]) {
135
+ return match[1].toLowerCase();
136
+ }
137
+ // Fallback to extractLanguageCode for other formats
138
+ return extractLanguageCode(defaultLanguage) || 'en';
139
+ }
140
+ return 'en';
141
+ }