sfc-utils 1.4.187 → 1.4.188
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/copy/docs.js +173 -171
- package/copy/sheets.js +5 -3
- package/package.json +1 -1
package/copy/docs.js
CHANGED
|
@@ -72,9 +72,11 @@ let grabDocs = (
|
|
|
72
72
|
.get({
|
|
73
73
|
fileId,
|
|
74
74
|
})
|
|
75
|
-
.catch(() => {
|
|
75
|
+
.catch((err) => {
|
|
76
76
|
// Maybe service account we doesn't have permissions -- try with normal token
|
|
77
|
-
|
|
77
|
+
console.error("Error getting file metadata for fileId:", fileId);
|
|
78
|
+
console.error("Full error:", err);
|
|
79
|
+
reject(err);
|
|
78
80
|
});
|
|
79
81
|
if (!meta) {
|
|
80
82
|
return;
|
|
@@ -96,185 +98,185 @@ let grabDocs = (
|
|
|
96
98
|
//build file path
|
|
97
99
|
var file_path = `${directory}${filename}.json`;
|
|
98
100
|
|
|
99
|
-
drive.files.export(
|
|
100
|
-
|
|
101
|
-
docHtml
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
101
|
+
drive.files.export(
|
|
102
|
+
{ fileId: fileId, mimeType: "text/html" },
|
|
103
|
+
function (err, docHtml) {
|
|
104
|
+
//Here's the parser
|
|
105
|
+
var handler = new htmlparser.DomHandler(function (error, dom) {
|
|
106
|
+
//First, we establish the tag handlers object that will parse
|
|
107
|
+
//different tags in different ways
|
|
108
|
+
//zoom down below the tagHandles object...
|
|
109
|
+
var tagHandlers = {
|
|
110
|
+
//the whole body of the doc comes thru here and is passed thru the _base key
|
|
111
|
+
_base: function (tag) {
|
|
112
|
+
var str = "";
|
|
113
|
+
//for each tag in the body...
|
|
114
|
+
tag.children.forEach(function (child) {
|
|
115
|
+
//...if the tag is in the tagHandles object, run the function
|
|
116
|
+
//we do this because there are lots of tags that we don't want to parse
|
|
117
|
+
if ((func = tagHandlers[child.name || child.type]))
|
|
118
|
+
// console.log('LOG 4: printing the child ***********************')
|
|
119
|
+
// console.log(child)
|
|
120
|
+
|
|
121
|
+
str += func(child);
|
|
122
|
+
// console.log("start of a string ***********************")
|
|
123
|
+
// console.log(str)
|
|
124
|
+
});
|
|
125
|
+
return str;
|
|
126
|
+
},
|
|
127
|
+
text: function (textTag) {
|
|
128
|
+
// console.log('LOG 6: and finally a text tag ***********************')
|
|
129
|
+
// console.log(textTag)
|
|
130
|
+
|
|
131
|
+
var styledTag = textTag.data;
|
|
132
|
+
if (textTag.parent.attribs.style !== undefined) {
|
|
133
|
+
if (
|
|
134
|
+
textTag.parent.attribs.style.includes("font-style:italic")
|
|
135
|
+
) {
|
|
136
|
+
styledTag = "<i>" + styledTag + "</i>";
|
|
137
|
+
}
|
|
138
|
+
if (
|
|
139
|
+
textTag.parent.attribs.style.includes("font-weight:700")
|
|
140
|
+
) {
|
|
141
|
+
styledTag = "<b>" + styledTag + "</b>";
|
|
142
|
+
}
|
|
136
143
|
}
|
|
144
|
+
return styledTag;
|
|
145
|
+
},
|
|
146
|
+
span: function (spanTag) {
|
|
147
|
+
//we rerun span tags thru _base to catch any nested tags
|
|
148
|
+
//eventually, we're trying to get to a text tag
|
|
149
|
+
return tagHandlers._base(spanTag);
|
|
150
|
+
},
|
|
151
|
+
p: function (pTag) {
|
|
152
|
+
//we rerun p tags thru _base to catch any nested tags
|
|
153
|
+
//eventually, we're trying to get to a text tag
|
|
154
|
+
return tagHandlers._base(pTag) + "\n";
|
|
155
|
+
},
|
|
156
|
+
a: function (aTag) {
|
|
157
|
+
var href = aTag.attribs.href;
|
|
158
|
+
if (href === undefined) return "";
|
|
159
|
+
|
|
160
|
+
// extract real URLs from Google's tracking
|
|
161
|
+
// from: http://www.google.com/url?q=http%3A%2F%2Fwww.sfchronicle.com...
|
|
162
|
+
// to: http://www.sfchronicle.com...
|
|
137
163
|
if (
|
|
138
|
-
|
|
164
|
+
aTag.attribs.href &&
|
|
165
|
+
url.parse(aTag.attribs.href, true).query &&
|
|
166
|
+
url.parse(aTag.attribs.href, true).query.q
|
|
139
167
|
) {
|
|
140
|
-
|
|
168
|
+
href = url.parse(aTag.attribs.href, true).query.q;
|
|
141
169
|
}
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
aTag
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
// console.log(aTag)
|
|
171
|
-
|
|
172
|
-
//ok if there is a bold/italics with a link, we need to do something special
|
|
173
|
-
//because that info is with the aTag parent and not registered in the parents
|
|
174
|
-
//of the children of the aTag.
|
|
175
|
-
//console.log((aTag.children).length)
|
|
176
|
-
//looks like an aTag only ever has 1 child so we can probably send it right to
|
|
177
|
-
//text... but we probably need to reconstruct the element
|
|
178
|
-
//so text parser expects this syntax:
|
|
179
|
-
// { type: 'text',
|
|
180
|
-
// data: 'this is the text',
|
|
181
|
-
// parent: {
|
|
182
|
-
// attribs: {
|
|
183
|
-
// style: '-webkit-text-decoration-skip:none;color:#1155cc;font-weight:700;text-decoration:underline;text-decoration-skip-ink:none;font-style:italic'
|
|
184
|
-
// }
|
|
185
|
-
// }
|
|
186
|
-
// }
|
|
187
|
-
//so we need to reconstruct the aTag object to include the style info
|
|
188
|
-
//and then send it to the text parser
|
|
189
|
-
|
|
190
|
-
aTag = {
|
|
191
|
-
type: "text",
|
|
192
|
-
data: aTag.children[0].data,
|
|
193
|
-
parent: {
|
|
194
|
-
attribs: {
|
|
195
|
-
style: aTag.parent.attribs.style,
|
|
196
|
-
},
|
|
197
|
-
},
|
|
198
|
-
};
|
|
199
|
-
|
|
200
|
-
//if the parent of the aTag has a style attribute, we need to pass that along
|
|
201
|
-
if (
|
|
202
|
-
aTag.parent.attribs.style &&
|
|
203
|
-
typeof aTag.parent.attribs.style === "string" &&
|
|
204
|
-
(aTag.parent.attribs.style.includes("font-style:italic") ||
|
|
205
|
-
aTag.parent.attribs.style.includes("font-weight:700"))
|
|
206
|
-
) {
|
|
207
|
-
aTag["parent"] = {
|
|
208
|
-
attribs: {
|
|
209
|
-
style: aTag.parent.attribs.style,
|
|
170
|
+
// console.log('LOG 5: printing an a tag ***********************')
|
|
171
|
+
// console.log(aTag)
|
|
172
|
+
|
|
173
|
+
//ok if there is a bold/italics with a link, we need to do something special
|
|
174
|
+
//because that info is with the aTag parent and not registered in the parents
|
|
175
|
+
//of the children of the aTag.
|
|
176
|
+
//console.log((aTag.children).length)
|
|
177
|
+
//looks like an aTag only ever has 1 child so we can probably send it right to
|
|
178
|
+
//text... but we probably need to reconstruct the element
|
|
179
|
+
//so text parser expects this syntax:
|
|
180
|
+
// { type: 'text',
|
|
181
|
+
// data: 'this is the text',
|
|
182
|
+
// parent: {
|
|
183
|
+
// attribs: {
|
|
184
|
+
// style: '-webkit-text-decoration-skip:none;color:#1155cc;font-weight:700;text-decoration:underline;text-decoration-skip-ink:none;font-style:italic'
|
|
185
|
+
// }
|
|
186
|
+
// }
|
|
187
|
+
// }
|
|
188
|
+
//so we need to reconstruct the aTag object to include the style info
|
|
189
|
+
//and then send it to the text parser
|
|
190
|
+
|
|
191
|
+
aTag = {
|
|
192
|
+
type: "text",
|
|
193
|
+
data: aTag.children[0].data,
|
|
194
|
+
parent: {
|
|
195
|
+
attribs: {
|
|
196
|
+
style: aTag.parent.attribs.style,
|
|
197
|
+
},
|
|
210
198
|
},
|
|
211
199
|
};
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
var str = '<a target="_blank" href="' + href + '">';
|
|
215
|
-
str += tagHandlers.text(aTag);
|
|
216
|
-
str += "</a>";
|
|
217
|
-
return str;
|
|
218
|
-
},
|
|
219
|
-
li: function (tag) {
|
|
220
|
-
return "* " + tagHandlers._base(tag) + "\n";
|
|
221
|
-
},
|
|
222
|
-
};
|
|
223
|
-
|
|
224
|
-
//special cases for lists
|
|
225
|
-
["ul", "ol"].forEach(function (tag) {
|
|
226
|
-
tagHandlers[tag] = tagHandlers.span;
|
|
227
|
-
});
|
|
228
|
-
|
|
229
|
-
//and headers
|
|
230
|
-
["h1", "h2", "h3", "h4", "h5", "h6"].forEach(function (tag) {
|
|
231
|
-
tagHandlers[tag] = tagHandlers.p;
|
|
232
|
-
});
|
|
233
|
-
|
|
234
|
-
//dom is something that the htmlparser2 produces from our docHtml.data
|
|
235
|
-
//let's look at it
|
|
236
|
-
// console.log('LOG 2: printing the dom ***********************')
|
|
237
|
-
// console.log(dom)
|
|
238
|
-
var body = dom[0].children[1];
|
|
239
|
-
|
|
240
|
-
//all of our content is nested in dom[0].children[1] object
|
|
241
|
-
//let's look at it
|
|
242
|
-
// console.log('LOG 3: printing the body ***********************')
|
|
243
|
-
// console.log(body)
|
|
244
|
-
|
|
245
|
-
//now let's jump back to the tagHandlers object
|
|
246
|
-
var parsedText = tagHandlers._base(body);
|
|
247
200
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
201
|
+
//if the parent of the aTag has a style attribute, we need to pass that along
|
|
202
|
+
if (
|
|
203
|
+
aTag.parent.attribs.style &&
|
|
204
|
+
typeof aTag.parent.attribs.style === "string" &&
|
|
205
|
+
(aTag.parent.attribs.style.includes("font-style:italic") ||
|
|
206
|
+
aTag.parent.attribs.style.includes("font-weight:700"))
|
|
207
|
+
) {
|
|
208
|
+
aTag["parent"] = {
|
|
209
|
+
attribs: {
|
|
210
|
+
style: aTag.parent.attribs.style,
|
|
211
|
+
},
|
|
212
|
+
};
|
|
213
|
+
}
|
|
251
214
|
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
215
|
+
var str = '<a target="_blank" href="' + href + '">';
|
|
216
|
+
str += tagHandlers.text(aTag);
|
|
217
|
+
str += "</a>";
|
|
218
|
+
return str;
|
|
219
|
+
},
|
|
220
|
+
li: function (tag) {
|
|
221
|
+
return "* " + tagHandlers._base(tag) + "\n";
|
|
222
|
+
},
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
//special cases for lists
|
|
226
|
+
["ul", "ol"].forEach(function (tag) {
|
|
227
|
+
tagHandlers[tag] = tagHandlers.span;
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
//and headers
|
|
231
|
+
["h1", "h2", "h3", "h4", "h5", "h6"].forEach(function (tag) {
|
|
232
|
+
tagHandlers[tag] = tagHandlers.p;
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
//dom is something that the htmlparser2 produces from our docHtml.data
|
|
236
|
+
//let's look at it
|
|
237
|
+
// console.log('LOG 2: printing the dom ***********************')
|
|
238
|
+
// console.log(dom)
|
|
239
|
+
var body = dom[0].children[1];
|
|
240
|
+
|
|
241
|
+
//all of our content is nested in dom[0].children[1] object
|
|
242
|
+
//let's look at it
|
|
243
|
+
// console.log('LOG 3: printing the body ***********************')
|
|
244
|
+
// console.log(body)
|
|
245
|
+
|
|
246
|
+
//now let's jump back to the tagHandlers object
|
|
247
|
+
var parsedText = tagHandlers._base(body);
|
|
248
|
+
|
|
249
|
+
// Convert html entities into the characters as they exist in the google doc
|
|
250
|
+
var entities = new Entities();
|
|
251
|
+
parsedText = entities.decode(parsedText);
|
|
252
|
+
|
|
253
|
+
// Remove smart quotes from inside tags
|
|
254
|
+
parsedText = parsedText.replace(/<[^<>]*>/g, function (match) {
|
|
255
|
+
return match.replace(/”|“/g, '"').replace(/‘|’/g, "'");
|
|
256
|
+
});
|
|
257
|
+
|
|
258
|
+
// Parse with Archie
|
|
259
|
+
var parsed = archieml.load(parsedText);
|
|
260
|
+
|
|
261
|
+
// Create the file
|
|
262
|
+
writeFile(file_path, JSON.stringify(parsed, null, 2));
|
|
255
263
|
});
|
|
256
264
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
//
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
parser.write(docHtml.data);
|
|
273
|
-
parser.done();
|
|
274
|
-
console.log("\x1b[32m", file_path + " created successfully");
|
|
275
|
-
// Exit the promise
|
|
276
|
-
resolve(true);
|
|
277
|
-
});
|
|
265
|
+
var parser = new htmlparser.Parser(handler);
|
|
266
|
+
|
|
267
|
+
//This what the google doc html looks like
|
|
268
|
+
//A lot of crappy tags we don't want.
|
|
269
|
+
// console.log('LOG 1: printing the docHtml ***********************')
|
|
270
|
+
// console.log(docHtml)
|
|
271
|
+
|
|
272
|
+
//now we parse the docHtml.data with our parser!
|
|
273
|
+
parser.write(docHtml.data);
|
|
274
|
+
parser.done();
|
|
275
|
+
console.log("\x1b[32m", file_path + " created successfully");
|
|
276
|
+
// Exit the promise
|
|
277
|
+
resolve(true);
|
|
278
|
+
}
|
|
279
|
+
);
|
|
278
280
|
}
|
|
279
281
|
);
|
|
280
282
|
});
|
package/copy/sheets.js
CHANGED
|
@@ -121,9 +121,11 @@ let getSheet = async (
|
|
|
121
121
|
auth,
|
|
122
122
|
spreadsheetId,
|
|
123
123
|
})
|
|
124
|
-
.catch(() => {
|
|
125
|
-
//
|
|
126
|
-
|
|
124
|
+
.catch((err) => {
|
|
125
|
+
// Maybe service account we doesn't have permissions -- try with normal token
|
|
126
|
+
console.error("Error getting file metadata for fileId:", fileId);
|
|
127
|
+
console.error("Full error:", err);
|
|
128
|
+
reject(err);
|
|
127
129
|
});
|
|
128
130
|
if (!output) {
|
|
129
131
|
return;
|