sfc-utils 1.4.109 → 1.4.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/copy/docs.js +177 -76
  2. package/package.json +1 -1
package/copy/docs.js CHANGED
@@ -51,7 +51,13 @@ var camelCase = function (str) {
51
51
  * https://console.developers.google.com/apis/api/drive.googleapis.com/quotas?project=<project>
52
52
  * where <project> is the project you authenticated with using `grunt google-auth`
53
53
  */
54
- let grabDocs = (auth, config, directory = null, filenames = null, useID = null) => {
54
+ let grabDocs = (
55
+ auth,
56
+ config,
57
+ directory = null,
58
+ filenames = null,
59
+ useID = null
60
+ ) => {
55
61
  return new Promise((resolve, reject) => {
56
62
  var drive = google.drive({
57
63
  auth,
@@ -90,87 +96,182 @@ let grabDocs = (auth, config, directory = null, filenames = null, useID = null)
90
96
  //build file path
91
97
  var file_path = `${directory}${filename}.json`;
92
98
 
93
- drive.files.export(
94
- { fileId: fileId, mimeType: "text/html" },
95
- function (err, docHtml) {
96
- var handler = new htmlparser.DomHandler(function (error, dom) {
97
- var tagHandlers = {
98
- _base: function (tag) {
99
- var str = "";
100
- tag.children.forEach(function (child) {
101
- if ((func = tagHandlers[child.name || child.type]))
102
- str += func(child);
103
- });
104
- return str;
105
- },
106
- text: function (textTag) {
107
- return textTag.data;
108
- },
109
- span: function (spanTag) {
110
- return tagHandlers._base(spanTag);
111
- },
112
- p: function (pTag) {
113
- return tagHandlers._base(pTag) + "\n";
114
- },
115
- a: function (aTag) {
116
- var href = aTag.attribs.href;
117
- if (href === undefined) return "";
118
-
119
- // extract real URLs from Google's tracking
120
- // from: http://www.google.com/url?q=http%3A%2F%2Fwww.sfchronicle.com...
121
- // to: http://www.sfchronicle.com...
99
+ drive.files.export({ fileId: fileId, mimeType: "text/html" }, function (
100
+ err,
101
+ docHtml
102
+ ) {
103
+ //Here's the parser
104
+ var handler = new htmlparser.DomHandler(function (error, dom) {
105
+ //First, we establish the tag handlers object that will parse
106
+ //different tags in different ways
107
+ //zoom down below the tagHandles object...
108
+ var tagHandlers = {
109
+ //the whole body of the doc comes thru here and is passed thru the _base key
110
+ _base: function (tag) {
111
+ var str = "";
112
+ //for each tag in the body...
113
+ tag.children.forEach(function (child) {
114
+ //...if the tag is in the tagHandles object, run the function
115
+ //we do this because there are lots of tags that we don't want to parse
116
+ if ((func = tagHandlers[child.name || child.type]))
117
+ // console.log('LOG 4: printing the child ***********************')
118
+ // console.log(child)
119
+
120
+ str += func(child);
121
+ // console.log("start of a string ***********************")
122
+ // console.log(str)
123
+ });
124
+ return str;
125
+ },
126
+ text: function (textTag) {
127
+ // console.log('LOG 6: and finally a text tag ***********************')
128
+ // console.log(textTag)
129
+
130
+ var styledTag = textTag.data;
131
+ if (textTag.parent.attribs.style !== undefined) {
132
+ if (
133
+ textTag.parent.attribs.style.includes("font-style:italic")
134
+ ) {
135
+ styledTag = "<i>" + styledTag + "</i>";
136
+ }
122
137
  if (
123
- aTag.attribs.href &&
124
- url.parse(aTag.attribs.href, true).query &&
125
- url.parse(aTag.attribs.href, true).query.q
138
+ textTag.parent.attribs.style.includes("font-weight:700")
126
139
  ) {
127
- href = url.parse(aTag.attribs.href, true).query.q;
140
+ styledTag = "<b>" + styledTag + "</b>";
128
141
  }
142
+ }
143
+ return styledTag;
144
+ },
145
+ span: function (spanTag) {
146
+ //we rerun span tags thru _base to catch any nested tags
147
+ //eventually, we're trying to get to a text tag
148
+ return tagHandlers._base(spanTag);
149
+ },
150
+ p: function (pTag) {
151
+ //we rerun p tags thru _base to catch any nested tags
152
+ //eventually, we're trying to get to a text tag
153
+ return tagHandlers._base(pTag) + "\n";
154
+ },
155
+ a: function (aTag) {
156
+ var href = aTag.attribs.href;
157
+ if (href === undefined) return "";
158
+
159
+ // extract real URLs from Google's tracking
160
+ // from: http://www.google.com/url?q=http%3A%2F%2Fwww.sfchronicle.com...
161
+ // to: http://www.sfchronicle.com...
162
+ if (
163
+ aTag.attribs.href &&
164
+ url.parse(aTag.attribs.href, true).query &&
165
+ url.parse(aTag.attribs.href, true).query.q
166
+ ) {
167
+ href = url.parse(aTag.attribs.href, true).query.q;
168
+ }
169
+ // console.log('LOG 5: printing an a tag ***********************')
170
+ // console.log(aTag)
171
+
172
+ //ok if there is a bold/italics with a link, we need to do something special
173
+ //because that info is with the aTag parent and not registered in the parents
174
+ //of the children of the aTag.
175
+ //console.log((aTag.children).length)
176
+ //looks like an aTag only ever has 1 child so we can probably send it right to
177
+ //text... but we probably need to reconstruct the element
178
+ //so text parser expects this syntax:
179
+ // { type: 'text',
180
+ // data: 'this is the text',
181
+ // parent: {
182
+ // attribs: {
183
+ // style: '-webkit-text-decoration-skip:none;color:#1155cc;font-weight:700;text-decoration:underline;text-decoration-skip-ink:none;font-style:italic'
184
+ // }
185
+ // }
186
+ // }
187
+ //so we need to reconstruct the aTag object to include the style info
188
+ //and then send it to the text parser
189
+
190
+ aTag = {
191
+ type: "text",
192
+ data: aTag.children[0].data,
193
+ parent: {
194
+ attribs: {
195
+ style: aTag.parent.attribs.style,
196
+ },
197
+ },
198
+ };
199
+
200
+ if (
201
+ aTag.parent.attribs.style.includes("font-style:italic") ||
202
+ aTag.parent.attribs.style.includes("font-weight:700")
203
+ ) {
204
+ aTag["parent"] = {
205
+ attribs: {
206
+ style: aTag.parent.attribs.style,
207
+ },
208
+ };
209
+ }
210
+
211
+ var str = '<a target="_blank" href="' + href + '">';
212
+ str += tagHandlers.text(aTag);
213
+ str += "</a>";
214
+ return str;
215
+ },
216
+ li: function (tag) {
217
+ return "* " + tagHandlers._base(tag) + "\n";
218
+ },
219
+ };
220
+
221
+ //special cases for lists
222
+ ["ul", "ol"].forEach(function (tag) {
223
+ tagHandlers[tag] = tagHandlers.span;
224
+ });
129
225
 
130
- var str = '<a target="_blank" href="' + href + '">';
131
- str += tagHandlers._base(aTag);
132
- str += "</a>";
133
- return str;
134
- },
135
- li: function (tag) {
136
- return "* " + tagHandlers._base(tag) + "\n";
137
- },
138
- };
139
-
140
- ["ul", "ol"].forEach(function (tag) {
141
- tagHandlers[tag] = tagHandlers.span;
142
- });
143
- ["h1", "h2", "h3", "h4", "h5", "h6"].forEach(function (tag) {
144
- tagHandlers[tag] = tagHandlers.p;
145
- });
146
-
147
- var body = dom[0].children[1];
148
- var parsedText = tagHandlers._base(body);
149
-
150
- // Convert html entities into the characters as they exist in the google doc
151
- var entities = new Entities();
152
- parsedText = entities.decode(parsedText);
153
-
154
- // Remove smart quotes from inside tags
155
- parsedText = parsedText.replace(/<[^<>]*>/g, function (match) {
156
- return match.replace(/”|“/g, '"').replace(/‘|’/g, "'");
157
- });
158
-
159
- // Parse with Archie
160
- var parsed = archieml.load(parsedText);
161
-
162
- // Create the file
163
- writeFile(file_path, JSON.stringify(parsed, null, 2));
226
+ //and headers
227
+ ["h1", "h2", "h3", "h4", "h5", "h6"].forEach(function (tag) {
228
+ tagHandlers[tag] = tagHandlers.p;
164
229
  });
165
230
 
166
- var parser = new htmlparser.Parser(handler);
167
- parser.write(docHtml.data);
168
- parser.done();
169
- console.log("\x1b[32m", file_path + " created successfully");
170
- // Exit the promise
171
- resolve(true);
172
- }
173
- );
231
+ //dom is something that the htmlparser2 produces from our docHtml.data
232
+ //let's look at it
233
+ // console.log('LOG 2: printing the dom ***********************')
234
+ // console.log(dom)
235
+ var body = dom[0].children[1];
236
+
237
+ //all of our content is nested in dom[0].children[1] object
238
+ //let's look at it
239
+ // console.log('LOG 3: printing the body ***********************')
240
+ // console.log(body)
241
+
242
+ //now let's jump back to the tagHandlers object
243
+ var parsedText = tagHandlers._base(body);
244
+
245
+ // Convert html entities into the characters as they exist in the google doc
246
+ var entities = new Entities();
247
+ parsedText = entities.decode(parsedText);
248
+
249
+ // Remove smart quotes from inside tags
250
+ parsedText = parsedText.replace(/<[^<>]*>/g, function (match) {
251
+ return match.replace(/”|“/g, '"').replace(/‘|’/g, "'");
252
+ });
253
+
254
+ // Parse with Archie
255
+ var parsed = archieml.load(parsedText);
256
+
257
+ // Create the file
258
+ writeFile(file_path, JSON.stringify(parsed, null, 2));
259
+ });
260
+
261
+ var parser = new htmlparser.Parser(handler);
262
+
263
+ //This what the google doc html looks like
264
+ //A lot of crappy tags we don't want.
265
+ // console.log('LOG 1: printing the docHtml ***********************')
266
+ // console.log(docHtml)
267
+
268
+ //now we parse the docHtml.data with our parser!
269
+ parser.write(docHtml.data);
270
+ parser.done();
271
+ console.log("\x1b[32m", file_path + " created successfully");
272
+ // Exit the promise
273
+ resolve(true);
274
+ });
174
275
  }
175
276
  );
176
277
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "sfc-utils",
3
- "version": "1.4.109",
3
+ "version": "1.4.110",
4
4
  "author": "ewagstaff <evanjwagstaff@gmail.com>",
5
5
  "dependencies": {
6
6
  "archieml": "^0.4.2",