scraply 1.0.12 → 1.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "scraply",
3
3
  "description": "A simple, configurable and functional content scraper",
4
- "version": "1.0.12",
4
+ "version": "1.0.13",
5
5
  "main": "src/scraply.js",
6
6
  "type": "module",
7
7
  "scripts": {
@@ -9,17 +9,22 @@ export const formatData = (entry) => {
9
9
  const isExcluded = CONFIG.DATA_FORMATTER.EXCLUDED_PATTERNS.some(pattern => new RegExp(pattern).test(entry.url));
10
10
 
11
11
  if (!isExcluded) {
12
- // Check for the specific category path
13
- const pathSegments = pathname.split('/');
14
- let categorisedPath = CONFIG.DATA_FORMATTER.CATEGORISED_PATHS[url.origin]?.[pathSegments[1]];
15
-
16
- // If no specific category path is found, use the "*" fallback
17
- if (!categorisedPath) {
18
- categorisedPath = CONFIG.DATA_FORMATTER.CATEGORISED_PATHS[url.origin]?.['*'];
19
- }
20
-
21
- if (categorisedPath) {
22
- return path.join(CONFIG.DATA_FORMATTER.FORMATTED_PATH, categorisedPath); // Return the path where the data should be saved.
12
+ const pathSegments = pathname.split('/').filter(Boolean); // filter out empty segments
13
+ const categorisedPaths = CONFIG.DATA_FORMATTER.CATEGORISED_PATHS[url.origin];
14
+
15
+ if (categorisedPaths) {
16
+ // Try to match the full path segments, reducing specificity step by step
17
+ let categorisedPath = null;
18
+
19
+ for (let i = pathSegments.length; i >= 1; i--) {
20
+ const pathKey = pathSegments.slice(0, i).join('/');
21
+ categorisedPath = categorisedPaths[pathKey];
22
+ if (categorisedPath) break;
23
+ }
24
+
25
+ // Fallback to wildcard match ('*') if no specific path is found
26
+ if (!categorisedPath) categorisedPath = categorisedPaths['*'];
27
+ if (categorisedPath) return path.join(CONFIG.DATA_FORMATTER.FORMATTED_PATH, categorisedPath); // Return the path where the data should be saved.
23
28
  }
24
29
  }
25
30
  } catch (e) {