scraply 1.0.13 → 1.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,11 +16,11 @@ jobs:
16
16
  - name: Set up Node.js
17
17
  uses: actions/setup-node@v3
18
18
  with:
19
- node-version: '20'
19
+ node-version: '22'
20
20
  registry-url: 'https://registry.npmjs.org/'
21
21
 
22
22
  - name: Install dependencies
23
- run: npm install
23
+ run: npm ci
24
24
 
25
25
  - name: Publish to npm
26
26
  run: npm publish
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "scraply",
3
3
  "description": "A simple, configurable and functional content scraper",
4
- "version": "1.0.13",
4
+ "version": "1.0.14",
5
5
  "main": "src/scraply.js",
6
6
  "type": "module",
7
7
  "scripts": {
package/src/scraply.js CHANGED
@@ -2,8 +2,7 @@ import { loadConfig } from './loadConfig.js';
2
2
  import { normalizeURL } from './utils/crawl/url/normalize.js';
3
3
  import { loadJSON, saveQueue, deleteDataFiles, deleteUntrackedFiles } from './utils/crawl/fileOperations.js';
4
4
  import { processURL } from './utils/crawl/url/processor.js';
5
- import { formatData, saveSortedFormattedJSON, saveHardcodedExtraLinks } from './utils/format/formatData.js';
6
- import path from 'node:path';
5
+ import { formatData, saveSortedFormattedJSON } from './utils/format/formatData.js';
7
6
 
8
7
  let urlData = [];
9
8
  let urlMetadata = {};
@@ -104,16 +103,6 @@ const start = async () => {
104
103
  };
105
104
  console.log(`${totalSavedURLs} total saved URLs to ${CONFIG.DATA_FORMATTER.FORMATTED_PATH}`);
106
105
 
107
- // Save hardcoded extra links to files.
108
- const totalHardcodedLinks = await saveHardcodedExtraLinks();
109
- console.log(`${totalHardcodedLinks} Hardcoded extra links saved to ${CONFIG.DATA_FORMATTER.FORMATTED_PATH}`);
110
-
111
- // Track the files generated for hardcoded links with full paths.
112
- CONFIG.DATA_FORMATTER.HARD_CODED_LINKS.forEach(link => {
113
- const hardcodedFilePath = path.join(CONFIG.DATA_FORMATTER.FORMATTED_PATH, link.file_name);
114
- generatedFiles.add(hardcodedFilePath); // Add the full path to the set
115
- });
116
-
117
106
  // Error reporting: Save into CONFIG.DATA_FORMATTER.ERROR_REPORT_PATH the URLs that had any error: Save the url, the referrer, status code and error!
118
107
  const errorData = errorUrls.map(entry => {
119
108
  return { url: entry.url, status: entry.status, error: entry.error };
@@ -50,14 +50,3 @@ export const saveSortedFormattedJSON = (filePath, data) => {
50
50
  const sortedData = sortData(data, 'url'); // ensure data is sorted before saving
51
51
  return fs.writeFileSync(filePath, JSON.stringify(sortedData, null, 2), 'utf8');
52
52
  };
53
-
54
- export const saveHardcodedExtraLinks = async () => {
55
- const hardcodedLinks = CONFIG.DATA_FORMATTER.HARD_CODED_LINKS;
56
-
57
- for (const link of hardcodedLinks) {
58
- const filePath = path.join(CONFIG.DATA_FORMATTER.FORMATTED_PATH, link.file_name);
59
- saveSortedFormattedJSON(filePath, link.data);
60
- }
61
-
62
- return hardcodedLinks.reduce((acc, link) => acc + link.data.length, 0); // Total number of links saved
63
- };