xlsform2lstsv 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +228 -0
- package/dist/config/ConfigManager.js +43 -0
- package/dist/config/types.js +13 -0
- package/dist/converters/xpathTranspiler.js +403 -0
- package/dist/generateFixtures.js +123 -0
- package/dist/index.js +10 -0
- package/dist/processors/FieldSanitizer.js +21 -0
- package/dist/processors/TSVGenerator.js +52 -0
- package/dist/processors/TypeMapper.js +55 -0
- package/dist/processors/XLSFormParser.js +32 -0
- package/dist/processors/XLSLoader.js +109 -0
- package/dist/processors/XLSValidator.js +121 -0
- package/dist/utils/helpers.js +42 -0
- package/dist/utils/languageUtils.js +141 -0
- package/dist/xlsformConverter.js +721 -0
- package/package.json +76 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @file Utility functions for handling multiple language support in XLSForms
|
|
3
|
+
*/
|
|
4
|
+
/**
|
|
5
|
+
* Common language codes based on IANA language subtag registry
|
|
6
|
+
* This is a subset of valid 2-letter language codes for validation purposes
|
|
7
|
+
*/
|
|
8
|
+
const VALID_LANGUAGE_CODES = new Set([
|
|
9
|
+
'aa', 'ab', 'ae', 'af', 'ak', 'am', 'an', 'ar', 'as', 'av',
|
|
10
|
+
'ay', 'az', 'ba', 'be', 'bg', 'bh', 'bi', 'bm', 'bn', 'bo',
|
|
11
|
+
'br', 'bs', 'ca', 'ce', 'ch', 'co', 'cr', 'cs', 'cu', 'cv',
|
|
12
|
+
'cy', 'da', 'de', 'dv', 'dz', 'ee', 'el', 'en', 'eo', 'es',
|
|
13
|
+
'et', 'eu', 'fa', 'ff', 'fi', 'fj', 'fo', 'fr', 'fy', 'ga',
|
|
14
|
+
'gd', 'gl', 'gn', 'gu', 'gv', 'ha', 'he', 'hi', 'ho', 'hr',
|
|
15
|
+
'ht', 'hu', 'hy', 'hz', 'ia', 'id', 'ie', 'ig', 'ii', 'ik',
|
|
16
|
+
'io', 'is', 'it', 'iu', 'ja', 'jv', 'ka', 'kg', 'ki', 'kj',
|
|
17
|
+
'kk', 'kl', 'km', 'kn', 'ko', 'kr', 'ks', 'ku', 'kv', 'kw',
|
|
18
|
+
'ky', 'la', 'lb', 'lg', 'li', 'ln', 'lo', 'lt', 'lu', 'lv',
|
|
19
|
+
'mg', 'mh', 'mi', 'mk', 'ml', 'mn', 'mr', 'ms', 'mt', 'my',
|
|
20
|
+
'na', 'nb', 'nd', 'ne', 'ng', 'nl', 'nn', 'no', 'nr', 'nv',
|
|
21
|
+
'ny', 'oc', 'oj', 'om', 'or', 'os', 'pa', 'pi', 'pl', 'ps',
|
|
22
|
+
'pt', 'qu', 'rm', 'rn', 'ro', 'ru', 'rw', 'sa', 'sc', 'sd',
|
|
23
|
+
'se', 'sg', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr',
|
|
24
|
+
'ss', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti',
|
|
25
|
+
'tk', 'tl', 'tn', 'to', 'tr', 'ts', 'tt', 'tw', 'ty', 'ug',
|
|
26
|
+
'uk', 'ur', 'uz', 've', 'vi', 'vo', 'wa', 'wo', 'xh', 'yi',
|
|
27
|
+
'yo', 'za', 'zh', 'zu'
|
|
28
|
+
]);
|
|
29
|
+
/**
|
|
30
|
+
* Extract language code from column header (e.g., "label::English (en)" -> "en")
|
|
31
|
+
*/
|
|
32
|
+
export function extractLanguageCode(header) {
|
|
33
|
+
// Pattern: label::Language Name (code) or label::code
|
|
34
|
+
// Handle both "label::English (en)" and "label::Español (es)" formats
|
|
35
|
+
const match = header.match(/::\s*[^)]+\(([a-z]{2})\)/i);
|
|
36
|
+
if (match && match[1]) {
|
|
37
|
+
return match[1].toLowerCase();
|
|
38
|
+
}
|
|
39
|
+
// Fallback: label::code (without parentheses)
|
|
40
|
+
const simpleMatch = header.match(/::\s*([a-z]{2})\b/i);
|
|
41
|
+
if (simpleMatch && simpleMatch[1]) {
|
|
42
|
+
return simpleMatch[1].toLowerCase();
|
|
43
|
+
}
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Extract base column name from language-specific header (e.g., "label::English (en)" -> "label")
|
|
48
|
+
*/
|
|
49
|
+
export function extractBaseColumnName(header) {
|
|
50
|
+
// Remove everything after ::
|
|
51
|
+
return header.split('::')[0].trim();
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Get all language codes from headers
|
|
55
|
+
*/
|
|
56
|
+
export function getLanguageCodesFromHeaders(headers) {
|
|
57
|
+
const languageCodes = new Set();
|
|
58
|
+
for (const header of headers) {
|
|
59
|
+
const code = extractLanguageCode(header);
|
|
60
|
+
if (code) {
|
|
61
|
+
languageCodes.add(code);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
return Array.from(languageCodes);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Get language-specific value from row for a given base column and language code
|
|
68
|
+
*/
|
|
69
|
+
export function getLanguageSpecificValue(row, baseColumn, languageCode) {
|
|
70
|
+
for (const [key, value] of Object.entries(row)) {
|
|
71
|
+
const headerCode = extractLanguageCode(key);
|
|
72
|
+
const headerBase = extractBaseColumnName(key);
|
|
73
|
+
if (headerBase === baseColumn && headerCode === languageCode) {
|
|
74
|
+
return value;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return undefined;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Get all language-specific values for a base column
|
|
81
|
+
*/
|
|
82
|
+
export function getAllLanguageValues(row, baseColumn) {
|
|
83
|
+
const result = {};
|
|
84
|
+
for (const [key, value] of Object.entries(row)) {
|
|
85
|
+
const headerCode = extractLanguageCode(key);
|
|
86
|
+
const headerBase = extractBaseColumnName(key);
|
|
87
|
+
if (headerBase === baseColumn && headerCode && value) {
|
|
88
|
+
result[headerCode] = value;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return result;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Check if a header is language-specific
|
|
95
|
+
*/
|
|
96
|
+
export function isLanguageSpecificHeader(header) {
|
|
97
|
+
return extractLanguageCode(header) !== null;
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Validate a language code against IANA language subtag registry
|
|
101
|
+
* @param code Language code to validate (e.g., 'en', 'es')
|
|
102
|
+
* @returns true if valid, false otherwise
|
|
103
|
+
*/
|
|
104
|
+
export function isValidLanguageCode(code) {
|
|
105
|
+
if (!code || typeof code !== 'string')
|
|
106
|
+
return false;
|
|
107
|
+
// Convert to lowercase and trim
|
|
108
|
+
const normalizedCode = code.toLowerCase().trim();
|
|
109
|
+
// Must be exactly 2 characters
|
|
110
|
+
if (normalizedCode.length !== 2)
|
|
111
|
+
return false;
|
|
112
|
+
// Must contain only letters
|
|
113
|
+
if (!/^[a-z]{2}$/.test(normalizedCode))
|
|
114
|
+
return false;
|
|
115
|
+
// Check against known valid language codes
|
|
116
|
+
return VALID_LANGUAGE_CODES.has(normalizedCode);
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Validate all language codes in a set and return invalid ones
|
|
120
|
+
* @param languageCodes Array of language codes to validate
|
|
121
|
+
* @returns Array of invalid language codes found
|
|
122
|
+
*/
|
|
123
|
+
export function validateLanguageCodes(languageCodes) {
|
|
124
|
+
return languageCodes.filter(code => !isValidLanguageCode(code));
|
|
125
|
+
}
|
|
126
|
+
/**
|
|
127
|
+
* Get the base language from settings (fallback to 'en')
|
|
128
|
+
*/
|
|
129
|
+
export function getBaseLanguage(settings) {
|
|
130
|
+
const defaultLanguage = settings.default_language;
|
|
131
|
+
if (defaultLanguage && typeof defaultLanguage === 'string') {
|
|
132
|
+
// Try to extract language code from formats like "Spanish (es)" or "English (en)"
|
|
133
|
+
const match = defaultLanguage.match(/\(([a-z]{2})\)/i);
|
|
134
|
+
if (match && match[1]) {
|
|
135
|
+
return match[1].toLowerCase();
|
|
136
|
+
}
|
|
137
|
+
// Fallback to extractLanguageCode for other formats
|
|
138
|
+
return extractLanguageCode(defaultLanguage) || 'en';
|
|
139
|
+
}
|
|
140
|
+
return 'en';
|
|
141
|
+
}
|