npm - @socialgouv/cdtn-elasticsearch - Versions diffs - 2.45.0 → 2.46.1 - Mend

@socialgouv/cdtn-elasticsearch 2.45.0 → 2.46.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +10 -0
package/package.json +2 -5
package/src/index.d.ts +0 -2
package/src/mapping/document.mapping.js +1 -4
package/src/vectorizer/index.js +1 -56
package/src/vectorizer/index.test.js +1 -41

package/CHANGELOG.md CHANGED Viewed

@@ -3,6 +3,16 @@
 All notable changes to this project will be documented in this file.
 See [Conventional Commits](https://conventionalcommits.org) for commit guidelines.
+## [2.46.1](https://github.com/SocialGouv/cdtn-admin/compare/v2.46.0...v2.46.1) (2024-06-27)
+**Note:** Version bump only for package @socialgouv/cdtn-elasticsearch
+# [2.46.0](https://github.com/SocialGouv/cdtn-admin/compare/v2.45.0...v2.46.0) (2024-06-24)
+### Features
+- **search:** remove usage of NLP to search document ([#1426](https://github.com/SocialGouv/cdtn-admin/issues/1426)) ([4f76e37](https://github.com/SocialGouv/cdtn-admin/commit/4f76e37d9d3ac34a70f041aa5946b72e8cc3fd36))
 # [2.45.0](https://github.com/SocialGouv/cdtn-admin/compare/v2.44.2...v2.45.0) (2024-06-18)
 **Note:** Version bump only for package @socialgouv/cdtn-elasticsearch

package/package.json CHANGED Viewed

@@ -1,15 +1,12 @@
 {
   "name": "@socialgouv/cdtn-elasticsearch",
   "description": "SocialGouv - Code du travail numerique - Infrastructure - Elasticsearch",
-  "version": "2.45.0",
+  "version": "2.46.1",
   "babel": {
     "plugins": [
       "@babel/plugin-transform-modules-commonjs"
     ]
   },
-  "dependencies": {
-    "got": "^11.8.2"
-  },
   "license": "Apache-2.0",
   "main": "src/index.js",
   "publishConfig": {
@@ -40,5 +37,5 @@
   },
   "sideEffects": false,
   "types": "src/index.d.ts",
-  "gitHead": "2f54b56bcc2c2af0e692e0c5674eb9bfae11300a"
+  "gitHead": "470a848cc73f632ae894c78b2c369fce61fd42c2"
 }

package/src/index.d.ts CHANGED Viewed

@@ -4,7 +4,5 @@ export const documentMapping: any;
 export const DOCUMENTS: string;
 export const indexDocumentsBatched: any;
 export const SUGGESTIONS: string;
-export const vectorizeDocument: any;
 export const version: any;
 export const suggestionMapping: any;
-export const vectorizeQuery: any;

package/src/mapping/document.mapping.js CHANGED Viewed

@@ -236,14 +236,11 @@ exports.documentMapping = {
       type: "text",
     },
-    title_vector: {
-      dims: 512,
-      type: "dense_vector",
-    },
     // The source URL
     url: {
       type: "keyword",
     },
     // used in prequalifieds
     variants: {
       type: "text",

package/src/vectorizer/index.js CHANGED Viewed

@@ -1,14 +1,5 @@
-// vectorizer is imported by code-du-travail-api which is using CommonJS, and throwing an exception
-// when requiring code-du-travail-data ES module, thus we keep using CommonJS import here
-const got = require("got");
 const { stopwords: semantic_stopwords } = require("../dataset/stop_words");
-// URL of the TF serve deployment
-const NLP_URL =
-  process.env.NLP_URL || "https://serving-ml.fabrique.social.gouv.fr";
-console.log("NLP URL:", NLP_URL);
-const tfServeURL = NLP_URL + "/v1/models/sentqam:predict";
 function stripAccents(text) {
   // strip accents
   return text.normalize("NFD").replace(/[\u0300-\u036f]/g, "");
@@ -16,8 +7,6 @@ function stripAccents(text) {
 const stopWords = new Set(semantic_stopwords.map(stripAccents));
-const cache = new Map();
 function preprocess(text) {
   const stripped = stripAccents(text);
@@ -31,48 +20,4 @@ function preprocess(text) {
   return noStopWords.join(" ");
 }
-async function callTFServe(json) {
-  const response = await got.post(tfServeURL, {
-    cache,
-    json,
-    responseType: "json",
-    retry: {
-      limit: 15,
-      methods: ["POST"],
-    },
-  });
-  return response.body["outputs"];
-}
-async function vectorizeDocument(title, content) {
-  if (title == undefined || title == "") {
-    throw new Error("Cannot vectorize document with empty title.");
-  }
-  const input = [preprocess(title)];
-  const context = content ? [preprocess(content)] : "";
-  const body = {
-    inputs: { context, input },
-    signature_name: "response_encoder",
-  };
-  const vectors = await callTFServe(body);
-  return vectors[0];
-}
-async function vectorizeQuery(query) {
-  if (!query) {
-    throw new Error("Cannot vectorize empty query.");
-  }
-  const inputs = [preprocess(query)];
-  const body = {
-    inputs,
-    signature_name: "question_encoder",
-  };
-  const vectors = await callTFServe(body);
-  return vectors[0];
-}
-module.exports = { preprocess, vectorizeDocument, vectorizeQuery };
+module.exports = { preprocess };

package/src/vectorizer/index.test.js CHANGED Viewed

@@ -1,44 +1,4 @@
-const { vectorizeDocument, vectorizeQuery, preprocess } = require("./index");
-const timeout = 10000;
-test(
-  "Should vectorize document",
-  async () => {
-    const vector1 = await vectorizeDocument("titre", "contenu");
-    expect(vector1).toBeDefined();
-    // FIXME Should return the same result but don't. See with remi and fabien.
-    // expect(vector1).toMatchSnapshot();
-    // preprocessing should make those embeddings equal
-    // FIXME Should return the same result but don't. See with remi and fabien.
-    // const vector2 = await vectorizeDocument("le titre", "et le contènu");
-    // expect(vector2).toEqual(vector1);
-  },
-  timeout
-);
-test(
-  "Should vectorize query",
-  async () => {
-    // FIXME Résultat aléatoire, voir pourquoi on n'obtient pas toujours la même réponse
-    // const vector1 = await vectorizeQuery("requete");
-    // expect(vector1).toMatchSnapshot();
-    // const vector2 = await vectorizeQuery("la requête");
-    // expect(vector2).toEqual(vector1);
-  },
-  timeout
-);
-test(
-  "Should fail when no content passed",
-  async () => {
-    await expect(vectorizeQuery()).rejects.toThrow(
-      new Error("Cannot vectorize empty query.")
-    );
-  },
-  timeout
-);
+const { preprocess } = require("./index");
 test("Should preprocess text", async () => {
   expect(preprocess("à la nôtre")).toEqual("");