sfc-utils 1.4.188 → 1.4.189
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/copy/docs.js +171 -173
- package/copy/sheets.js +3 -5
- package/package.json +1 -1
package/copy/docs.js
CHANGED
|
@@ -72,11 +72,9 @@ let grabDocs = (
|
|
|
72
72
|
.get({
|
|
73
73
|
fileId,
|
|
74
74
|
})
|
|
75
|
-
.catch((
|
|
75
|
+
.catch(() => {
|
|
76
76
|
// Maybe service account we doesn't have permissions -- try with normal token
|
|
77
|
-
|
|
78
|
-
console.error("Full error:", err);
|
|
79
|
-
reject(err);
|
|
77
|
+
reject();
|
|
80
78
|
});
|
|
81
79
|
if (!meta) {
|
|
82
80
|
return;
|
|
@@ -98,185 +96,185 @@ let grabDocs = (
|
|
|
98
96
|
//build file path
|
|
99
97
|
var file_path = `${directory}${filename}.json`;
|
|
100
98
|
|
|
101
|
-
drive.files.export(
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
if (
|
|
139
|
-
textTag.parent.attribs.style.includes("font-weight:700")
|
|
140
|
-
) {
|
|
141
|
-
styledTag = "<b>" + styledTag + "</b>";
|
|
142
|
-
}
|
|
99
|
+
drive.files.export({ fileId: fileId, mimeType: "text/html" }, function (
|
|
100
|
+
err,
|
|
101
|
+
docHtml
|
|
102
|
+
) {
|
|
103
|
+
//Here's the parser
|
|
104
|
+
var handler = new htmlparser.DomHandler(function (error, dom) {
|
|
105
|
+
//First, we establish the tag handlers object that will parse
|
|
106
|
+
//different tags in different ways
|
|
107
|
+
//zoom down below the tagHandles object...
|
|
108
|
+
var tagHandlers = {
|
|
109
|
+
//the whole body of the doc comes thru here and is passed thru the _base key
|
|
110
|
+
_base: function (tag) {
|
|
111
|
+
var str = "";
|
|
112
|
+
//for each tag in the body...
|
|
113
|
+
tag.children.forEach(function (child) {
|
|
114
|
+
//...if the tag is in the tagHandles object, run the function
|
|
115
|
+
//we do this because there are lots of tags that we don't want to parse
|
|
116
|
+
if ((func = tagHandlers[child.name || child.type]))
|
|
117
|
+
// console.log('LOG 4: printing the child ***********************')
|
|
118
|
+
// console.log(child)
|
|
119
|
+
|
|
120
|
+
str += func(child);
|
|
121
|
+
// console.log("start of a string ***********************")
|
|
122
|
+
// console.log(str)
|
|
123
|
+
});
|
|
124
|
+
return str;
|
|
125
|
+
},
|
|
126
|
+
text: function (textTag) {
|
|
127
|
+
// console.log('LOG 6: and finally a text tag ***********************')
|
|
128
|
+
// console.log(textTag)
|
|
129
|
+
|
|
130
|
+
var styledTag = textTag.data;
|
|
131
|
+
if (textTag.parent.attribs.style !== undefined) {
|
|
132
|
+
if (
|
|
133
|
+
textTag.parent.attribs.style.includes("font-style:italic")
|
|
134
|
+
) {
|
|
135
|
+
styledTag = "<i>" + styledTag + "</i>";
|
|
143
136
|
}
|
|
144
|
-
return styledTag;
|
|
145
|
-
},
|
|
146
|
-
span: function (spanTag) {
|
|
147
|
-
//we rerun span tags thru _base to catch any nested tags
|
|
148
|
-
//eventually, we're trying to get to a text tag
|
|
149
|
-
return tagHandlers._base(spanTag);
|
|
150
|
-
},
|
|
151
|
-
p: function (pTag) {
|
|
152
|
-
//we rerun p tags thru _base to catch any nested tags
|
|
153
|
-
//eventually, we're trying to get to a text tag
|
|
154
|
-
return tagHandlers._base(pTag) + "\n";
|
|
155
|
-
},
|
|
156
|
-
a: function (aTag) {
|
|
157
|
-
var href = aTag.attribs.href;
|
|
158
|
-
if (href === undefined) return "";
|
|
159
|
-
|
|
160
|
-
// extract real URLs from Google's tracking
|
|
161
|
-
// from: http://www.google.com/url?q=http%3A%2F%2Fwww.sfchronicle.com...
|
|
162
|
-
// to: http://www.sfchronicle.com...
|
|
163
137
|
if (
|
|
164
|
-
|
|
165
|
-
url.parse(aTag.attribs.href, true).query &&
|
|
166
|
-
url.parse(aTag.attribs.href, true).query.q
|
|
138
|
+
textTag.parent.attribs.style.includes("font-weight:700")
|
|
167
139
|
) {
|
|
168
|
-
|
|
140
|
+
styledTag = "<b>" + styledTag + "</b>";
|
|
169
141
|
}
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
aTag
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
142
|
+
}
|
|
143
|
+
return styledTag;
|
|
144
|
+
},
|
|
145
|
+
span: function (spanTag) {
|
|
146
|
+
//we rerun span tags thru _base to catch any nested tags
|
|
147
|
+
//eventually, we're trying to get to a text tag
|
|
148
|
+
return tagHandlers._base(spanTag);
|
|
149
|
+
},
|
|
150
|
+
p: function (pTag) {
|
|
151
|
+
//we rerun p tags thru _base to catch any nested tags
|
|
152
|
+
//eventually, we're trying to get to a text tag
|
|
153
|
+
return tagHandlers._base(pTag) + "\n";
|
|
154
|
+
},
|
|
155
|
+
a: function (aTag) {
|
|
156
|
+
var href = aTag.attribs.href;
|
|
157
|
+
if (href === undefined) return "";
|
|
158
|
+
|
|
159
|
+
// extract real URLs from Google's tracking
|
|
160
|
+
// from: http://www.google.com/url?q=http%3A%2F%2Fwww.sfchronicle.com...
|
|
161
|
+
// to: http://www.sfchronicle.com...
|
|
162
|
+
if (
|
|
163
|
+
aTag.attribs.href &&
|
|
164
|
+
url.parse(aTag.attribs.href, true).query &&
|
|
165
|
+
url.parse(aTag.attribs.href, true).query.q
|
|
166
|
+
) {
|
|
167
|
+
href = url.parse(aTag.attribs.href, true).query.q;
|
|
168
|
+
}
|
|
169
|
+
// console.log('LOG 5: printing an a tag ***********************')
|
|
170
|
+
// console.log(aTag)
|
|
171
|
+
|
|
172
|
+
//ok if there is a bold/italics with a link, we need to do something special
|
|
173
|
+
//because that info is with the aTag parent and not registered in the parents
|
|
174
|
+
//of the children of the aTag.
|
|
175
|
+
//console.log((aTag.children).length)
|
|
176
|
+
//looks like an aTag only ever has 1 child so we can probably send it right to
|
|
177
|
+
//text... but we probably need to reconstruct the element
|
|
178
|
+
//so text parser expects this syntax:
|
|
179
|
+
// { type: 'text',
|
|
180
|
+
// data: 'this is the text',
|
|
181
|
+
// parent: {
|
|
182
|
+
// attribs: {
|
|
183
|
+
// style: '-webkit-text-decoration-skip:none;color:#1155cc;font-weight:700;text-decoration:underline;text-decoration-skip-ink:none;font-style:italic'
|
|
184
|
+
// }
|
|
185
|
+
// }
|
|
186
|
+
// }
|
|
187
|
+
//so we need to reconstruct the aTag object to include the style info
|
|
188
|
+
//and then send it to the text parser
|
|
189
|
+
|
|
190
|
+
aTag = {
|
|
191
|
+
type: "text",
|
|
192
|
+
data: aTag.children[0].data,
|
|
193
|
+
parent: {
|
|
194
|
+
attribs: {
|
|
195
|
+
style: aTag.parent.attribs.style,
|
|
196
|
+
},
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
//if the parent of the aTag has a style attribute, we need to pass that along
|
|
201
|
+
if (
|
|
202
|
+
aTag.parent.attribs.style &&
|
|
203
|
+
typeof aTag.parent.attribs.style === "string" &&
|
|
204
|
+
(aTag.parent.attribs.style.includes("font-style:italic") ||
|
|
205
|
+
aTag.parent.attribs.style.includes("font-weight:700"))
|
|
206
|
+
) {
|
|
207
|
+
aTag["parent"] = {
|
|
208
|
+
attribs: {
|
|
209
|
+
style: aTag.parent.attribs.style,
|
|
198
210
|
},
|
|
199
211
|
};
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
var str = '<a target="_blank" href="' + href + '">';
|
|
215
|
+
str += tagHandlers.text(aTag);
|
|
216
|
+
str += "</a>";
|
|
217
|
+
return str;
|
|
218
|
+
},
|
|
219
|
+
li: function (tag) {
|
|
220
|
+
return "* " + tagHandlers._base(tag) + "\n";
|
|
221
|
+
},
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
//special cases for lists
|
|
225
|
+
["ul", "ol"].forEach(function (tag) {
|
|
226
|
+
tagHandlers[tag] = tagHandlers.span;
|
|
227
|
+
});
|
|
200
228
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
},
|
|
212
|
-
};
|
|
213
|
-
}
|
|
229
|
+
//and headers
|
|
230
|
+
["h1", "h2", "h3", "h4", "h5", "h6"].forEach(function (tag) {
|
|
231
|
+
tagHandlers[tag] = tagHandlers.p;
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
//dom is something that the htmlparser2 produces from our docHtml.data
|
|
235
|
+
//let's look at it
|
|
236
|
+
// console.log('LOG 2: printing the dom ***********************')
|
|
237
|
+
// console.log(dom)
|
|
238
|
+
var body = dom[0].children[1];
|
|
214
239
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
//and headers
|
|
231
|
-
["h1", "h2", "h3", "h4", "h5", "h6"].forEach(function (tag) {
|
|
232
|
-
tagHandlers[tag] = tagHandlers.p;
|
|
233
|
-
});
|
|
234
|
-
|
|
235
|
-
//dom is something that the htmlparser2 produces from our docHtml.data
|
|
236
|
-
//let's look at it
|
|
237
|
-
// console.log('LOG 2: printing the dom ***********************')
|
|
238
|
-
// console.log(dom)
|
|
239
|
-
var body = dom[0].children[1];
|
|
240
|
-
|
|
241
|
-
//all of our content is nested in dom[0].children[1] object
|
|
242
|
-
//let's look at it
|
|
243
|
-
// console.log('LOG 3: printing the body ***********************')
|
|
244
|
-
// console.log(body)
|
|
245
|
-
|
|
246
|
-
//now let's jump back to the tagHandlers object
|
|
247
|
-
var parsedText = tagHandlers._base(body);
|
|
248
|
-
|
|
249
|
-
// Convert html entities into the characters as they exist in the google doc
|
|
250
|
-
var entities = new Entities();
|
|
251
|
-
parsedText = entities.decode(parsedText);
|
|
252
|
-
|
|
253
|
-
// Remove smart quotes from inside tags
|
|
254
|
-
parsedText = parsedText.replace(/<[^<>]*>/g, function (match) {
|
|
255
|
-
return match.replace(/”|“/g, '"').replace(/‘|’/g, "'");
|
|
256
|
-
});
|
|
257
|
-
|
|
258
|
-
// Parse with Archie
|
|
259
|
-
var parsed = archieml.load(parsedText);
|
|
260
|
-
|
|
261
|
-
// Create the file
|
|
262
|
-
writeFile(file_path, JSON.stringify(parsed, null, 2));
|
|
240
|
+
//all of our content is nested in dom[0].children[1] object
|
|
241
|
+
//let's look at it
|
|
242
|
+
// console.log('LOG 3: printing the body ***********************')
|
|
243
|
+
// console.log(body)
|
|
244
|
+
|
|
245
|
+
//now let's jump back to the tagHandlers object
|
|
246
|
+
var parsedText = tagHandlers._base(body);
|
|
247
|
+
|
|
248
|
+
// Convert html entities into the characters as they exist in the google doc
|
|
249
|
+
var entities = new Entities();
|
|
250
|
+
parsedText = entities.decode(parsedText);
|
|
251
|
+
|
|
252
|
+
// Remove smart quotes from inside tags
|
|
253
|
+
parsedText = parsedText.replace(/<[^<>]*>/g, function (match) {
|
|
254
|
+
return match.replace(/”|“/g, '"').replace(/‘|’/g, "'");
|
|
263
255
|
});
|
|
264
256
|
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
//
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
257
|
+
// Parse with Archie
|
|
258
|
+
var parsed = archieml.load(parsedText);
|
|
259
|
+
|
|
260
|
+
// Create the file
|
|
261
|
+
writeFile(file_path, JSON.stringify(parsed, null, 2));
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
var parser = new htmlparser.Parser(handler);
|
|
265
|
+
|
|
266
|
+
//This what the google doc html looks like
|
|
267
|
+
//A lot of crappy tags we don't want.
|
|
268
|
+
// console.log('LOG 1: printing the docHtml ***********************')
|
|
269
|
+
// console.log(docHtml)
|
|
270
|
+
|
|
271
|
+
//now we parse the docHtml.data with our parser!
|
|
272
|
+
parser.write(docHtml.data);
|
|
273
|
+
parser.done();
|
|
274
|
+
console.log("\x1b[32m", file_path + " created successfully");
|
|
275
|
+
// Exit the promise
|
|
276
|
+
resolve(true);
|
|
277
|
+
});
|
|
280
278
|
}
|
|
281
279
|
);
|
|
282
280
|
});
|
package/copy/sheets.js
CHANGED
|
@@ -121,11 +121,9 @@ let getSheet = async (
|
|
|
121
121
|
auth,
|
|
122
122
|
spreadsheetId,
|
|
123
123
|
})
|
|
124
|
-
.catch((
|
|
125
|
-
//
|
|
126
|
-
|
|
127
|
-
console.error("Full error:", err);
|
|
128
|
-
reject(err);
|
|
124
|
+
.catch(() => {
|
|
125
|
+
// This might fail if we don't have access
|
|
126
|
+
reject();
|
|
129
127
|
});
|
|
130
128
|
if (!output) {
|
|
131
129
|
return;
|