portadom 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/README.md +63 -1
  2. package/package.json +1 -1
package/README.md CHANGED
@@ -58,7 +58,9 @@ const btnText = await btn.text();
58
58
  npm install portadom
59
59
  ```
60
60
 
61
- ## Basic usage
61
+ ## How to use
62
+
63
+ ### Minimal example
62
64
 
63
65
  ```js
64
66
  const html = `<div>
@@ -75,6 +77,59 @@ const btnProp = await btn.href();
75
77
  // btnProp == "https://example.com#"
76
78
  ```
77
79
 
80
+ ### Full example
81
+
82
+ ```js
83
+ const $ = loadCheerio(html);
84
+ const dom = cheerioPortadom($.root(), url);
85
+ // ...
86
+ const rootEl = dom.root();
87
+ const url = await dom.url();
88
+
89
+ // Find and extract data
90
+ const entries = await rootEl.findMany('.list-row:not(.native-agent):not(.reach-list)')
91
+ .mapAsyncSerial(async (el) => {
92
+ const employerName = await el.findOne('.employer').text();
93
+ const employerUrl = await el.findOne('.offer-company-logo-link').href();
94
+ const employerLogoUrl = await el.findOne('.offer-company-logo-link img').src();
95
+
96
+ const offerUrlEl = el.findOne('h2 a');
97
+ const offerUrl = await offerUrlEl.href();
98
+ const offerName = await offerUrlEl.text();
99
+ const offerId = offerUrl?.match(/O\d{2,}/)?.[0] ?? null;
100
+
101
+ const location = await el.findOne('.job-location').text();
102
+
103
+ const salaryText = await el.findOne('.label-group > a[data-dimension7="Salary label"]').text();
104
+
105
+ const labels = await el.findMany('.label-group > a:not([data-dimension7="Salary label"])')
106
+ .mapAsyncSerial((el) => el.text())
107
+ .then((arr) => arr.filter(Boolean) as string[]);
108
+
109
+ const footerInfoEl = el.findOne('.list-footer .info');
110
+ const lastChangeRelativeTimeEl = footerInfoEl.findOne('strong');
111
+ const lastChangeRelativeTime = await lastChangeRelativeTimeEl.text();
112
+ // Remove the element so it's easier to get the text content
113
+ await lastChangeRelativeTimeEl.remove();
114
+ const lastChangeTypeText = await footerInfoEl.textAsLower();
115
+ const lastChangeType = lastChangeTypeText === 'pridané' ? 'added' : 'modified';
116
+
117
+ return {
118
+ listingUrl: url,
119
+ employerName,
120
+ employerUrl,
121
+ employerLogoUrl,
122
+ offerName,
123
+ offerUrl,
124
+ offerId,
125
+ location,
126
+ labels,
127
+ lastChangeRelativeTime,
128
+ lastChangeType,
129
+ };
130
+ });
131
+ ```
132
+
78
133
  ### Loading
79
134
 
80
135
  Here is how you can load DOM in different environments:
@@ -200,3 +255,10 @@ const attrs = await Promise.all(mapPromises);
200
255
  See the [full documentation here](./docs/typedoc/modules.md).
201
256
  - [Portadom](./docs/typedoc/interfaces/Portadom.md)
202
257
  - [Portapage](./docs/typedoc/interfaces/Portapage.md)
258
+
259
+ ## Real life exampes
260
+
261
+ - [Profesia.sk Scraper](https://github.com/JuroOravec/apify-actor-profesia-sk)
262
+ - [Example 1](https://github.com/JuroOravec/apify-actor-profesia-sk/blob/3793915632bd81dc257d36699808635c8bc3f87e/src/pageActions/jobListing.ts#L128)
263
+ - [Example 2](https://github.com/JuroOravec/apify-actor-profesia-sk/blob/3793915632bd81dc257d36699808635c8bc3f87e/src/pageActions/jobDetail.ts#L75)
264
+ - [SKCRIS Scraper](https://github.com/JuroOravec/apify-actor-skcris/blob/9ce92f9bd55ffcde91f22744e49ba97b6b4f0e44/src/pageActions/detail.ts#L510)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "portadom",
3
- "version": "1.0.2",
3
+ "version": "1.0.3",
4
4
  "private": false,
5
5
  "description": "Single DOM manipulation interface across Browser API, JSDOM, Cheerio, Playwright",
6
6
  "author": "Juro Oravec <juraj.oravec.josefson@gmail.com>",